diff --git a/.gitmodules b/.gitmodules
index 796f7b17c3..81d066b8b0 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,7 +1,7 @@
 [submodule "3rdparty/Megatron-LM"]
 	path = 3rdparty/Megatron-LM-workspace/Megatron-LM
-	url = https://github.com/terrykong/Megatron-LM.git
-	branch = yuya/nemo-rl-use-dev
+	url = https://github.com/yaoyu-33/Megatron-LM.git
+	branch = main
 	shallow = true
 [submodule "3rdparty/Megatron-Bridge"]
 	path = 3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 879c913a6e..ced9ef0418 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,15 +57,18 @@ repos:
   # intend to merge. Without it, you might run experiments with one config, but when merging upstream,
   # the config could silently fall back to the base defaults—resulting in different hyperparameters.
   #
-  # For example, we’ve seen cases where an SFT recipe runs without a custom chat_template. When merged,
-  # it unexpectedly picks up the default recommended chat_template from upstream, which doesn’t match
+  # For example, we've seen cases where an SFT recipe runs without a custom chat_template. When merged,
+  # it unexpectedly picks up the default recommended chat_template from upstream, which doesn't match
   # the original experiment setup.
   #
   # If this check is disruptive, you can disable the pre-commit hook locally. However, before a recipe
   # is accepted upstream, we expect the config to be minimized.
+  #
+  # The minimize-check command infers the base config from each recipe's `defaults` key, so it
+  # correctly handles inheritance chains (e.g., child → parent → grandparent).
   - repo: local
     hooks:
-      - id: configs-minimize-check-llm
+      - id: configs-minimize-check
         name: minimize-check llm recipes
         language: system
         pass_filenames: false
@@ -74,17 +77,4 @@ repos:
           - -lc
           - |
             set -euo pipefail
-            base="examples/configs/dpo.yaml"; for f in examples/configs/recipes/llm/dpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
-            base="examples/configs/grpo_math_1B.yaml"; for f in examples/configs/recipes/llm/grpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
-            base="examples/configs/sft.yaml"; for f in examples/configs/recipes/llm/sft-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
-            base="examples/configs/distillation_math.yaml"; for f in examples/configs/recipes/llm/distillation-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
-      - id: configs-minimize-check-vlm
-        name: minimize-check vlm recipes
-        language: system
-        pass_filenames: false
-        entry: bash
-        args:
-          - -lc
-          - |
-            set -euo pipefail
-            base="examples/configs/vlm_grpo_3B.yaml"; for f in examples/configs/recipes/vlm/vlm_grpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
+            for f in examples/configs/recipes/{llm,vlm}/*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$f"; done
\ No newline at end of file
diff --git a/3rdparty/Gym-workspace/Gym b/3rdparty/Gym-workspace/Gym
index c192ee407f..23cdeb3807 160000
--- a/3rdparty/Gym-workspace/Gym
+++ b/3rdparty/Gym-workspace/Gym
@@ -1 +1 @@
-Subproject commit c192ee407ff71046015d11da7c8960082bd62418
+Subproject commit 23cdeb38077d7b72a5fbae0927a2e1a74bfc15f7
diff --git a/3rdparty/Gym-workspace/pyproject.toml b/3rdparty/Gym-workspace/pyproject.toml
deleted file mode 100644
index dfda26adaf..0000000000
--- a/3rdparty/Gym-workspace/pyproject.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[build-system]
-requires = ["setuptools>=61.0", "wheel"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "nemo_gym"
-dynamic = ["dependencies", "version"]
-authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
-description = "Standalone packaging for the Gym sub-module."
-requires-python = ">=3.10"
diff --git a/3rdparty/Gym-workspace/setup.py b/3rdparty/Gym-workspace/setup.py
deleted file mode 100644
index b6df0d66c0..0000000000
--- a/3rdparty/Gym-workspace/setup.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import tomllib
-from pathlib import Path
-
-import setuptools
-
-final_packages = []
-final_package_dir = {}
-
-# If the submodule is present, expose `nemo_gym` package from the checkout
-src_dir = Path("Gym")
-
-
-CACHED_DEPENDENCIES = [
-    "openai<=2.6.1",
-    "tqdm",
-    "pydantic",
-    "pydantic_core",
-    "devtools",
-    "fastapi",
-    "uvicorn",
-    "uvloop",
-    "hydra-core",
-    "omegaconf",
-    "gradio",
-    "mlflow",
-    "tdigest>=0.5.2.2",
-    "aiohttp",
-    "yappi",
-    "ray[default]",
-    "psutil",
-    "datasets",
-]
-
-if src_dir.exists():
-    pyproject_toml_path = src_dir / "pyproject.toml"
-    with pyproject_toml_path.open("rb") as f:
-        pyproject_toml = tomllib.load(f)
-    if not pyproject_toml_path.exists():
-        raise FileNotFoundError(
-            f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
-        )
-
-    packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"]
-
-    for package in packages:
-        final_packages.append(package)
-        final_package_dir[package] = src_dir / package
-
-    actual_dependencies = pyproject_toml["project"]["dependencies"]
-
-    ########################################
-    # Compare cached dependencies with the submodule's pyproject
-    ########################################
-
-    missing_in_cached = set(actual_dependencies) - set(CACHED_DEPENDENCIES)
-    extra_in_cached = set(CACHED_DEPENDENCIES) - set(actual_dependencies)
-
-    if missing_in_cached or extra_in_cached:
-        print(
-            "[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.",
-            file=sys.stderr,
-        )
-        if missing_in_cached:
-            print(
-                "  - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:",
-                file=sys.stderr,
-            )
-            for dep in sorted(missing_in_cached):
-                print(f"    * {dep}", file=sys.stderr)
-        if extra_in_cached:
-            print(
-                "  - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:",
-                file=sys.stderr,
-            )
-            for dep in sorted(extra_in_cached):
-                print(f"    * {dep}", file=sys.stderr)
-        print(
-            "  Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-    else:
-        print(
-            "[Gym][setup] Dependency sets are consistent with the submodule pyproject.",
-            file=sys.stderr,
-        )
-
-
-setuptools.setup(
-    name="nemo_gym",
-    version="0.0.0",
-    description="Standalone packaging for the Gym sub-module.",
-    author="NVIDIA",
-    author_email="nemo-toolkit@nvidia.com",
-    packages=final_packages,
-    package_dir=final_package_dir,
-    py_modules=["is_nemo_gym_installed"],
-    install_requires=CACHED_DEPENDENCIES,
-)
diff --git a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
index 1e9a459b43..15398e08fc 160000
--- a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
+++ b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
@@ -1 +1 @@
-Subproject commit 1e9a459b43aa1f62ca1356e554d2b0196ebdd546
+Subproject commit 15398e08fc86be3de084c7382116527246ab1852
diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py
index 9aec2e6481..a0beea9449 100644
--- a/3rdparty/Megatron-Bridge-workspace/setup.py
+++ b/3rdparty/Megatron-Bridge-workspace/setup.py
@@ -26,8 +26,9 @@
 bridge_package_name = "megatron.bridge"
 
 CACHED_DEPENDENCIES = [
-    "transformers>=4.57.1",
+    "transformers<5.0.0",
     "datasets",
+    "accelerate",
     "omegaconf>=2.3.0",
     "tensorboard>=2.19.0",
     "typing-extensions",
@@ -40,7 +41,7 @@
     "hydra-core>1.3,<=1.3.2",
     "megatron-core[dev,mlm]>=0.15.0a0,<0.17.0",
     "qwen-vl-utils",
-    "transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
+    "transformer-engine[pytorch]>=2.10.0a0,<2.12.0",
     "mamba-ssm",
     "nvidia-resiliency-ext",
     "causal-conv1d",
diff --git a/3rdparty/Megatron-LM-workspace/Megatron-LM b/3rdparty/Megatron-LM-workspace/Megatron-LM
index b73ae5cdab..193463c4f8 160000
--- a/3rdparty/Megatron-LM-workspace/Megatron-LM
+++ b/3rdparty/Megatron-LM-workspace/Megatron-LM
@@ -1 +1 @@
-Subproject commit b73ae5cdab9d409fcface2b2f3c375710abe6911
+Subproject commit 193463c4f8414e6906a40dd527a450bca50706b1
diff --git a/3rdparty/Megatron-LM-workspace/setup.py b/3rdparty/Megatron-LM-workspace/setup.py
index 0a088b393e..fb0a7cf92e 100644
--- a/3rdparty/Megatron-LM-workspace/setup.py
+++ b/3rdparty/Megatron-LM-workspace/setup.py
@@ -44,30 +44,31 @@
 CACHED_DEPENDENCIES = [
     # Default dependencies from pyproject.toml
     "torch",
-    "numpy<2.0.0",
+    "numpy",
     "packaging>=24.2",
     # Dev dependencies from pyproject.toml
-    "nvidia-modelopt[torch]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
-    "transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
-    "nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
+    "nvidia-modelopt[torch]; sys_platform != 'darwin'",
+    "transformer-engine[pytorch,core_cu13]>=2.9.0a0,<2.12.0",
+    "nvidia-resiliency-ext",
     "tqdm",
     "einops~=0.8",
     "tensorstore~=0.1,!=0.1.46,!=0.1.72",
     "nvtx~=0.2",
     "multi-storage-client~=0.27",
     "opentelemetry-api~=1.33.1",
-    "setuptools<80.0.0",
     "mamba-ssm~=2.2",
     "causal-conv1d~=1.5",
+    "flash-linear-attention~=0.3.2",
     "nv-grouped-gemm~=1.1",
     "megatron-energon[av_decode]~=6.0",
-    "av<16.0.0",
-    "flashinfer-python",
+    "av",
+    "flashinfer-python~=0.5.0",
     "wget",
     "onnxscript",
-    "flash-linear-attention~=0.3.2",
     # VCS dependency - must match pyproject.toml [tool.uv.sources]
     "emerging_optimizers @ git+https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git@v0.1.0",
+    "datasets",
+    "fastapi~=0.50",
 ]
 
 
diff --git a/README.md b/README.md
index a933709256..a23574ffb7 100644
--- a/README.md
+++ b/README.md
@@ -286,6 +286,10 @@ sbatch \
     --gres=gpu:8 \
     ray.sub
 ```
+
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 The required `CONTAINER` can be built by following the instructions in the [Docker documentation](docs/docker.md).
 
 #### GRPO Qwen2.5-32B
@@ -313,6 +317,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 #### GRPO Multi-Turn
 
 We also support multi-turn generation and training (tool use, games, etc.).
@@ -361,6 +368,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## Supervised Fine-Tuning (SFT)
 
 We provide example SFT experiments using various datasets including [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), OpenAI format datasets (with tool calling support), and custom JSONL datasets. For detailed documentation on supported datasets and configurations, see the [SFT documentation](docs/guides/sft.md).
@@ -406,6 +416,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## DPO
 
 We provide a sample DPO experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
@@ -464,6 +477,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## RM
 
 We provide a sample RM experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
@@ -508,6 +524,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## Evaluation
 
 We provide evaluation tools to assess model capabilities.
@@ -590,7 +609,7 @@ For detailed instructions on how to set up and launch NeMo RL on Slurm or Kubern
 
 - Large amounts of memory fragmentation might occur when running models without support for FlashAttention2.
   If OOM occurs after a few iterations of training, it may help to tweak the allocator settings to reduce memory fragmentation.
-  To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf)
+  To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-alloc-conf)
   at **either** one of the following places:
   1. Launch training with:
   ```sh
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 62b8796dff..3eb864ecd6 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -45,6 +45,18 @@ apt-get clean
 rm -rf /var/lib/apt/lists/*
 EOF
 
+# CMake (for sglang build)
+RUN GITHUB_ARTIFACTORY=github.com \
+    && CMAKE_VERSION=3.31.1 \
+    && ARCH=$(uname -m) \
+    && CMAKE_INSTALLER="cmake-${CMAKE_VERSION}-linux-${ARCH}" \
+    && curl --retry 3 --retry-delay 2 -fsSL -o "${CMAKE_INSTALLER}.tar.gz" \
+        "https://${GITHUB_ARTIFACTORY}/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_INSTALLER}.tar.gz" \
+    && tar -xzf "${CMAKE_INSTALLER}.tar.gz" \
+    && cp -r "${CMAKE_INSTALLER}/bin/"* /usr/local/bin/ \
+    && cp -r "${CMAKE_INSTALLER}/share/"* /usr/local/share/ \
+    && rm -rf "${CMAKE_INSTALLER}" "${CMAKE_INSTALLER}.tar.gz"
+
 # Install uv and python
 ARG UV_VERSION=0.9.7
 ARG PYTHON_VERSION=3.12
@@ -102,6 +114,7 @@ fi
 # The venv is symlinked to avoid bloating the layer size
 uv sync --link-mode symlink --locked --no-install-project
 uv sync --link-mode symlink --locked --extra vllm --no-install-project
+uv sync --link-mode symlink --locked --extra sglang --no-install-project
 uv sync --link-mode symlink --locked --extra mcore --no-install-project
 uv sync --link-mode symlink --locked --extra automodel --no-install-project
 uv sync --link-mode symlink --locked --all-groups --no-install-project
diff --git a/docs/Makefile b/docs/Makefile
index ae85733231..3a49020b1a 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,19 +1,9 @@
-# Makefile for building documentation with isolated UV environment
+# Makefile for building documentation with UV
 
 .DEFAULT_GOAL := help
 
-# Cross-platform venv paths
-ifeq ($(OS),Windows_NT)
-    VENV_DIR = .venv/Scripts
-    PYTHON = $(VENV_DIR)/python.exe
-    ACTIVATE_CMD = .venv\Scripts\activate
-    RM = if exist _build rmdir /s /q _build
-else
-    VENV_DIR = .venv/bin
-    PYTHON = $(VENV_DIR)/python
-    ACTIVATE_CMD = source .venv/bin/activate
-    RM = rm -rf _build
-endif
+# UV run command with docs dependencies
+UV_RUN = uv run --project ../pyproject.toml --group docs
 
 # ------------------------------
 # Help
@@ -29,55 +19,41 @@ help:
 	@echo "  make docs-publish   Build docs (fail on warnings)"
 	@echo "  make docs-clean     Clean built documentation"
 	@echo ""
-	@echo "The environment is automatically set up on first run."
-	@echo "To manually activate the docs environment, run:"
-	@echo "  $(ACTIVATE_CMD)"
-	@echo ""
 
 # ------------------------------
-# Ensure UV and isolated docs environment
+# Check UV is installed
 # ------------------------------
-ensure-docs-env:
+check-uv:
 	@command -v uv >/dev/null 2>&1 || ( \
 		echo ""; \
 		echo "❌ uv is not installed. See https://docs.astral.sh/uv/getting-started/installation/"; \
 		exit 1 \
 	)
-	@if [ ! -x "$(PYTHON)" ]; then \
-		echo "📦 Creating isolated docs environment..."; \
-		uv venv .venv; \
-		uv sync --project ../pyproject.toml --group docs; \
-		echo "✅ Docs environment ready."; \
-		echo "📝 To activate it: $(ACTIVATE_CMD)"; \
-	fi
 
 # ------------------------------
 # Build HTML docs
 # ------------------------------
-docs-html: ensure-docs-env
+docs-html: check-uv
 	@echo "Building HTML documentation..."
-	$(PYTHON) -m sphinx -b html . _build/html
+	$(UV_RUN) sphinx-build -b html . _build/html
 
 # ------------------------------
 # Build docs for publication (fail on warnings)
 # ------------------------------
-docs-publish: ensure-docs-env
+docs-publish: check-uv
 	@echo "Building HTML documentation (fail on warnings)..."
-	$(PYTHON) -m sphinx --fail-on-warning -b html . _build/html
+	$(UV_RUN) sphinx-build --fail-on-warning -b html . _build/html
 
 # ------------------------------
 # Start live-reload server
 # ------------------------------
-docs-live: ensure-docs-env
+docs-live: check-uv
 	@echo "Starting live-reload server..."
-	$(PYTHON) -m sphinx_autobuild . _build/html --port 8001
-	@echo ""
-	@echo "📝 To manually activate the docs environment in a shell:"
-	@echo "  $(ACTIVATE_CMD)"
+	$(UV_RUN) sphinx-autobuild . _build/html --port 8001
 
 # ------------------------------
 # Clean built docs
 # ------------------------------
 docs-clean:
 	@echo "Cleaning built documentation..."
-	$(RM)
+	rm -rf _build
diff --git a/docs/about/algorithms/dapo.md b/docs/about/algorithms/dapo.md
index 2a3b4a1aec..5317aa06e1 100644
--- a/docs/about/algorithms/dapo.md
+++ b/docs/about/algorithms/dapo.md
@@ -64,6 +64,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## Configuration
 
 DAPO uses the same configuration structure as GRPO. The key parameters are:
diff --git a/docs/about/algorithms/dpo.md b/docs/about/algorithms/dpo.md
index 474170ceeb..ba0861a4e0 100644
--- a/docs/about/algorithms/dpo.md
+++ b/docs/about/algorithms/dpo.md
@@ -56,3 +56,5 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
diff --git a/docs/about/algorithms/grpo.md b/docs/about/algorithms/grpo.md
index ad79b944ac..c6a86236c6 100644
--- a/docs/about/algorithms/grpo.md
+++ b/docs/about/algorithms/grpo.md
@@ -64,6 +64,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 The required `CONTAINER` can be built by following the instructions in the [Docker documentation](../../docker.md).
 
 ## GRPO Qwen2.5-32B
@@ -92,6 +95,9 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 ## GRPO Multi-Turn
 
 We also support multi-turn generation and training (tool use, games, etc.). Reference example for training to play a Sliding Puzzle Game:
diff --git a/docs/about/algorithms/on-policy-distillation.md b/docs/about/algorithms/on-policy-distillation.md
index 7c44945de9..fd954d3496 100644
--- a/docs/about/algorithms/on-policy-distillation.md
+++ b/docs/about/algorithms/on-policy-distillation.md
@@ -41,3 +41,5 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
diff --git a/docs/about/algorithms/rm.md b/docs/about/algorithms/rm.md
index 741651567b..9e5fb268f0 100644
--- a/docs/about/algorithms/rm.md
+++ b/docs/about/algorithms/rm.md
@@ -42,3 +42,5 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
diff --git a/docs/about/algorithms/sft.md b/docs/about/algorithms/sft.md
index c36824deaa..a0dc0526b5 100644
--- a/docs/about/algorithms/sft.md
+++ b/docs/about/algorithms/sft.md
@@ -43,3 +43,5 @@ sbatch \
     ray.sub
 ```
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
diff --git a/docs/about/model-support.md b/docs/about/model-support.md
new file mode 100644
index 0000000000..eb425ae75c
--- /dev/null
+++ b/docs/about/model-support.md
@@ -0,0 +1,30 @@
+# Model Support
+
+## Broad coverage for 🤗Hugging Face models via [NeMo AutoModel](https://github.com/NVIDIA-NeMo/Automodel)
+
+NeMo-RL support 🤗Hugging Face models from the following classes
+- LLMs ([AutoModelForCausalLM](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModelForCausalLM))
+- VLMs ([AutoModelForImageTextToText](https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoModelForImageTextToText))
+
+for model sizes under 70B at up to 32k sequence length.
+
+## Optimal acceleration for top models via [NeMo Megatron-bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)
+
+[NeMo Megatron-Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge) provides acceleration [recipes](https://github.com/NVIDIA-NeMo/RL/tree/main/examples/configs/recipes) for the below models. Users can also leverage the on-line checkpoint conversion (i.e the "bridge") by directly inputting a 🤗Hugging Face checkpoint. 
+
+**LLMs**:
+
+- **Qwen**: Qwen2.5-1.5B/7B/32B, Qwen3-1.5B/8B/32B, Qwen3-30B-A3B, Qwen3-235B-A22B
+- **Llama**: Llama 3.1/3.3-8B, Llama 3.1/3.3-70B, Llama 3.2-1B
+- **Deepseek**: Deepseek-V3/R1-671B
+- **Mistral**: Mistral-NeMo-12B
+- **Moonlight-16B-A3B**
+- **Gemma**: Gemma-3-1B/27B
+- **GPT-OSS**: GPT-OSS-20B/120B
+- **NeMotron**: Llama-Nemotron-Super-49B, Nemotron-nano-v2-12B, Nemotron-Nano-v3-30A3B
+
+**VLMs**:
+
+- **Qwen**: Qwen2.5VL-3B
+
+In addition, please refer to our [performance page](https://docs.nvidia.com/nemo/rl/latest/about/performance-summary.html) for benchmarks and full reproducible yaml recipe configs.
diff --git a/docs/about/performance-summary.md b/docs/about/performance-summary.md
index da333d5408..770513cbef 100644
--- a/docs/about/performance-summary.md
+++ b/docs/about/performance-summary.md
@@ -3,7 +3,7 @@
 
 As part of the NVIDIA NeMo Framework, NeMo RL, provides optimal performance for reinforcement learning on generative AI models by incorporating the latest optimizations - such as refit optimizations, mixed-precision training, and off-policy training.
 
-This page provides performance benchmarks for LLMs and VLMs using NeMo RL across different GPU systems and configurations.
+This page provides performance benchmarks for LLMs and VLMs using NeMo RL across different GPU systems and configurations. The recipes to reproduce these runs, in yaml file form, can be found under [this folder](https://github.com/NVIDIA-NeMo/RL/tree/r0.5.0/examples/configs/recipes/llm/performance).
 
 ## Nomenclature
 
@@ -43,28 +43,59 @@ The performance data includes:
 
 ---
 
-## Nemo RL v0.4
+## Nemo RL v0.5
 
-* GRPO Dataset: [OpenMathInstruct-2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2)
+### H100 BF16 Benchmarks
+* GRPO Dataset: [OpenMathInstruct-2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2); DAPO dataset: [DAPOMath17k](https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k)
 * System: DGX-H100
 * Precision: Training BF16, Generation BF16
 * Training Backend: Megatron-core.
 
-| Model     |On/Off policy|T-Max Sequence Length|G-Average Seq len|#-GPUs|G-GBS|T-GBS|Generation [TP,PP]|Training [TP,CP,EP,PP,VPP]|Tokens / sec / GPU|Total Step time(s)|
-|-------    |--------     |-----                |-----            |------|---- |---- |----              |----                      |---               |---|
-|LLAMA3.1_8B|On policy    |4,096                |1,060            |16    |2,048|512  |[1,1]             |[1,1,1,1,1,2,n/a]         |1,562             | 97.7|
-|LLAMA3.1_8B|1-step Off   |4,096                |1,129            |16    |2,048|512  |[1,1]             |[1,1,1,1,1,2,n/a]         |2,161             | 74.6|
-|DeepSeek V3|On policy    |1,536                |745              |256   |512  |512  |[32,1]            |[1,1,16,16,n/a]           |11                | 154|
-|DeepSeek V3|1-step Off   |1,536                |744              |512   |512  |512  |[32,1]            |[1,1,16,16,n/a]           |11.0              | 77.9|
-|Qwen3-235B |On policy    |8,192                |5,671            |128   |512  |512  |[16,1]            |[2,2,16,8,n/a]            |45.7              | 506|
-|Qwen3-235B |1-step Off   |8,192                |5,691            |256   |512  |512  |[8,1]             |[4,1,16,8,n/a]            |52.2              | 241|
-|Qwen3-30B3A|On policy    |4,096                |3,154            |32    |2,048|512  |[4,1]             |[2,1,8,1,n/a]             |925               | 225|
-|Qwen3-30B3A|1-step Off   |4,096                |3,158            |32    |2,048|512  |[4,1]             |[2,1,8,1,n/a]             |864               | 244|
-|Qwen3-32B  |On policy    |4,096                |3,206            |32    |2,048|512  |[4,1]             |[4,1,1,4,n/a]             |540               | 393|
-|Qwen3-32B  |1-step Off   |4,096                |3,207            |64    |2,048|512  |[4,1]             |[4,1,1,4,n/a]             |494               | 215|
+| Algorithm | Model     |On/Off policy|T-Max Sequence Length|G-Average Seq len|#-GPUs|G-GBS|T-GBS|Generation [TP,PP]|Training [TP,CP,EP,PP,VPP]|Tokens / sec / GPU|Total Step time(s)|
+|---------  |-------    |--------     |-----                |-----            |------|---- |---- |----              |----                      |---               |---|
+| GRPO      |LLAMA3.1_8B|On policy    |4,096                |1,019            |16    |2,048|512  |[1,1]             |[1,1,1,1,1,2,n/a]         |1,581             | 92.8|
+| GRPO      |LLAMA3.1_8B|1-step Off   |4,096                |1,123            |16    |2,048|512  |[1,1]             |[1,1,1,1,1,1,n/a]         |2,478             | 64.8|
+| GRPO      |DeepSeek V3|On policy    |1,536                |744              |256   |512  |512  |[32,1]            |[1,1,16,16,n/a]           |12.7              | 134|
+| GRPO      |DeepSeek V3|1-step Off   |1,536                |738              |512   |512  |512  |[32,1]            |[1,1,16,16,n/a]           |13.1              | 64.9|
+| DAPO      |DeepSeek V3|On policy    |1,536                |974              |512   |512  |512  |[64,1]            |[8,4,32,8,n/a]            |2.45              | 974|
+| GRPO      |Qwen3-235B |On policy    |8,192                |5,700            |128   |512  |512  |[16,1]            |[2,2,16,8,n/a]            |54.1              | 431|
+| GRPO      |Qwen3-235B |1-step Off   |8,192                |5,707            |256   |512  |512  |[8,1]             |[4,1,16,8,n/a]            |58.7              | 203|
+| GRPO      |Qwen3-30B3A|On policy    |4,096                |3,196            |32    |2,048|512  |[2,1]             |[1,1,8,1,n/a]             |1066               | 198|
+| GRPO      |Qwen3-30B3A|1-step Off   |4,096                |3,201            |32    |2,048|512  |[2,1]             |[1,1,8,2,n/a]             |1391               | 154|
+| GRPO      |Qwen3-32B  |On policy    |4,096                |3,251            |32    |2,048|512  |[4,1]             |[4,1,1,4,n/a]             |571               | 376|
+| GRPO      |Qwen3-32B  |1-step Off   |4,096                |3,252            |64    |2,048|512  |[4,1]             |[4,1,1,4,n/a]             |538               | 200|
+
+### H100 FP8 Benchmarks
+* GRPO Dataset: [OpenMathInstruct-2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2)
+* System: DGX-H100
+* Precision: Generation FP8, Training FP8
+* Training Backend: Megatron-core.
+
+| Algorithm | Model     |On/Off policy|T-Max Sequence Length|G-Average Seq len|#-GPUs|G-GBS|T-GBS|Generation [TP,PP]|Training [TP,CP,EP,PP,VPP]|Tokens / sec / GPU|Total Step time(s)|
+|---------  |-------    |--------     |-----                |-----            |------|---- |---- |----              |----                      |---               |---|
+| GRPO      |LLAMA3.1_8B|1-step Off   |4,096                |1,128            |16    |2,048|512  |[1,1]             |[1,1,1,1,1,1,n/a]         |3,052             | 53.0|
+| GRPO      |DeepSeek V3|1-step Off   |1,536                |761              |512   |512  |512  |[16,1]            |[1,1,16,16,n/a]           |14.1              | 67.6|
+
+### GB200 BF16 Benchmarks
+* GRPO Dataset: [OpenMathInstruct-2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2)
+* System: GB200-NVL72
+* Precision: Training BF16, Generation BF16
+* Training Backend: Megatron-core.
 
+| Algorithm | Model     |On/Off policy|T-Max Sequence Length|G-Average Seq len|#-GPUs|G-GBS|T-GBS|Generation [TP,PP]|Training [TP,CP,EP,PP,VPP]|Tokens / sec / GPU|Total Step time(s)|
+|---------  |-------    |--------     |-----                |-----            |------|---- |---- |----              |----                      |---               |---|
+| GRPO      |LLAMA3.1_8B|On policy    |4,096                |1,066            |8     |2,048|512  |[1,1]             |[1,1,1,1,1,1,n/a]         |3,359             | 91.0|
+| GRPO      |LLAMA3.1_8B|1-step Off   |4,096                |1,107            |8     |2,048|512  |[1,1]             |[1,1,1,1,1,1,n/a]         |4,463             | 71.1|
+| GRPO      |DeepSeek V3|On policy    |1,536                |996              |128   |512  |512  |[32,1]            |[1,1,16,8,n/a]            |34.3              | 128|
+| GRPO      |DeepSeek V3|1-step Off   |1,536                |994              |256   |512  |512  |[16,1]            |[1,1,16,8,n/a]            |31.7              | 64.5|
+| GRPO      |Qwen3-235B |On policy    |8,192                |5,711            |64    |512  |512  |[8,1]            |[2,2,16,4,n/a]            |140              | 332|
+| GRPO      |Qwen3-235B |1-step Off   |8,192                |5,711            |128   |512  |512  |[8,1]             |[4,1,16,4,n/a]            |87.9              | 268|
+| GRPO      |Qwen3-30B3A|On policy    |4,096                |3,198            |16    |2,048|512  |[1,1]             |[1,1,16,1,n/a]             |1,822               | 232|
+| GRPO      |Qwen3-30B3A|1-step Off   |4,096                |3,204            |32    |2,048|512  |[1,1]             |[1,1,16,1,n/a]             |1,558               | 136|
+| GRPO      |Qwen3-32B  |On policy    |4,096                |3,253            |16    |2,048|512  |[1,1]             |[2,1,1,1,n/a]             |1,127              | 381|
+| GRPO      |Qwen3-32B  |1-step Off   |4,096                |3,258            |32    |2,048|512  |[1,1]             |[2,1,1,1,n/a]             |1,025               | 210|
 
 Note:
 
 * All Mixture-of-expert (MoE) model training uses token drop-less. 
-* The following metrics are extracted from the average of 5 steps: G-Average Seq len, Tokens/sec/gpu, Total Step time(s). Because of the averaging, the numbers in table does not completely match the equation stated in Performance Metrics above but the difference is small.
\ No newline at end of file
+* The following metrics are extracted from the average of 5 steps: G-Average Seq len, Tokens/sec/gpu, Total Step time(s). Because of the averaging, the numbers in table does not completely match the equation stated in Performance Metrics above but the difference is small.
diff --git a/docs/about/tips-and-tricks.md b/docs/about/tips-and-tricks.md
index 60f91a6e0d..2e0ff4abe4 100644
--- a/docs/about/tips-and-tricks.md
+++ b/docs/about/tips-and-tricks.md
@@ -22,7 +22,7 @@ NRL_FORCE_REBUILD_VENVS=true uv run examples/run_grpo.py ...
 
 ## Memory Fragmentation
 
-Large amounts of memory fragmentation might occur when running models without support for FlashAttention2. If OOM occurs after a few iterations of training, it may help to tweak the allocator settings to reduce memory fragmentation. To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf) at **either** one of the following places:
+Large amounts of memory fragmentation might occur when running models without support for FlashAttention2. If OOM occurs after a few iterations of training, it may help to tweak the allocator settings to reduce memory fragmentation. To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-alloc-conf) at **either** one of the following places:
 
 1. Launch training with:
 
diff --git a/docs/cluster.md b/docs/cluster.md
index 73e2225a1b..8b1d03fba2 100644
--- a/docs/cluster.md
+++ b/docs/cluster.md
@@ -28,6 +28,9 @@ sbatch \
 > [!TIP]
 > Depending on your Slurm cluster configuration, you may or may not need to include the `--gres=gpu:8` option in the `sbatch` command.
 
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead of `--gres=gpu:8`.
+
 Upon successful submission, Slurm will print the `SLURM_JOB_ID`:
 ```text
 Submitted batch job 1980204
@@ -58,6 +61,10 @@ sbatch \
     --gres=gpu:8 \
     ray.sub
 ```
+
+> [!NOTE]
+> For GB200 systems with 4 GPUs per node, use `--gres=gpu:4` instead.
+
 Upon successful submission, Slurm will print the `SLURM_JOB_ID`:
 ```text
 Submitted batch job 1980204
@@ -173,4 +180,395 @@ sbatch ray.sub \
 
 ## Kubernetes
 
-TBD
+This guide outlines the process of migrating NemoRL training jobs from a Slurm environment to a Kubernetes cluster utilizing Ray orchestration and NVIDIA GPUs.
+
+---
+
+## Prerequisites
+
+Before beginning, ensure the following requirements are met:
+
+* **Cluster Access:** You must have access to the K8s cluster from a client machine via `kubectl`.
+
+> [!IMPORTANT]
+> **Authentication Required**:
+> Simply installing `kubectl` on your local machine is not sufficient. You must work with your **Infrastructure Administrator** to obtain a valid `KUBECONFIG` file (usually placed at `~/.kube/config`) or authentication token. This file contains the endpoint and credentials required to connect your local client to the specific remote GPU cluster.
+> 
+* **Operators:** The cluster must have the [**NVIDIA Operator**](https://github.com/NVIDIA/gpu-operator) (for GPU provisioning) and the [**KubeRay Operator**](https://github.com/ray-project/kuberay) (for Ray Cluster lifecycle management) installed.
+* **Registry Access:** Ability to push/pull Docker images to a registry (e.g., nvcr.io or Docker Hub).
+
+### 1. Test Cluster Access
+Verify your connection and operator status:
+
+```bash
+kubectl get pods -o wide -w
+```
+
+### 2. Build and Push the Docker Container
+We will use the NVIDIA cloud registry (`nvcr.io`) for this guide. From your client machine:
+
+**Login to the Registry**
+```bash
+# Set up Docker and nvcr.io with your NGC_API_KEY
+docker login nvcr.io
+
+# Username: $oauthtoken
+# Password: <NGC_API_KEY>
+```
+
+**Build and Push**
+Clone the NemoRL repository and build the container.
+
+```bash
+# Clone recursively
+git clone [https://github.com/NVIDIA-NeMo/RL](https://github.com/NVIDIA-NeMo/RL) --recursive
+cd RL
+
+# If you already cloned without --recursive, update submodules:
+git submodule update --init --recursive
+
+# Set your organization
+export NGC_ORG=<YOUR_NGC_ORG>
+
+# Self-contained build (default: builds from main)
+docker buildx build --target release -f docker/Dockerfile --tag nvcr.io/${NGC_ORG}/nemo-rl:latest --push .
+```
+
+---
+
+## Phase 1: Infrastructure Setup
+
+### 1. Configure Shared Storage (NFS)
+This tutorial uses a NFS-based `ReadWriteMany` volume to ensure the Head node and Worker nodes see the exact same files (code, data, checkpoints). This prevents "File Not Found" errors.
+
+> **Note:** This is a cluster-wide resource. If your admin has already provided an NFS storage class, you only need to create this PVC once.
+
+**File:** `shared-pvc.yaml`
+
+```yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: nemo-shared-workspace
+spec:
+  accessModes:
+    - ReadWriteMany     # Critical: Allows RW access from multiple nodes
+  storageClassName: nfs-client
+  resources:
+    requests:
+      storage: 2Ti      # Adjust based on dataset and model size
+```
+
+**Apply the configuration:**
+```bash
+kubectl apply -f shared-pvc.yaml
+```
+
+### 2. Create Registry Secret
+This secret allows the cluster to pull the private image you built earlier.
+
+```bash
+kubectl create secret docker-registry nvcr-secret \
+  --docker-server=nvcr.io \
+  --docker-username='$oauthtoken' \
+  --docker-password=YOUR_NGC_API_KEY_HERE \
+  --docker-email=admin@example.com
+```
+
+---
+
+## Phase 2: Ray Cluster Configuration
+
+We will create a Ray cluster with **1x Head node** and **1x Worker node** (with 8x GPUs each).
+
+**Key Configuration Notes:**
+* **Networking:** Uses `bond0` to bypass virtual ethernet overhead (check with your admin regarding the correct interface for NCCL).
+* **Memory:** Disables Ray's OOM killer to prevent false positives.
+* **Caching:** Redirects HuggingFace cache to the shared PVC.
+* **Version Match:** The `rayVersion` spec must match the version in `RL/pyproject.toml`. Check this example [version snapshot](https://github.com/NVIDIA-NeMo/RL/blob/b2e4265d4f2424c0467691f2f0f864cdebe1ab0f/pyproject.toml#L25).
+* **Container image:** Replace the image name `nvcr.io/nvidian/nemo-rl:latest` with your actual image, e.g., `nvcr.io/YOUR_NGC_ORG/nemo-rl:latest`.
+
+> [!WARNING]
+> **Check Your Node Capacity & Resource Limits**
+> The resource requests in the manifest below (e.g., `cpu: "128"`, `memory: "1500Gi"`) are configured for high-end H100 nodes. If these numbers exceed your physical node's available capacity, your pods will remain in a **Pending** state indefinitely.
+>
+> Additionally, the shared memory volume is backed by actual node RAM:
+> ```yaml
+> volumes:
+>   - name: dshm
+>     emptyDir:
+>       medium: Memory
+>       sizeLimit: "1000Gi" # Counts against Node RAM
+> ```
+> You must ensure your physical node has enough memory to cover the container `requests` **plus** the `sizeLimit` of this volume. Please adjust these values to match your specific hardware compute shape.
+
+**File:** `nemo-rl-h100.yaml`
+
+```yaml
+apiVersion: ray.io/v1
+kind: RayCluster
+metadata:
+  name: nemo-h100-cluster
+spec:
+  rayVersion: '2.49.2'
+
+  ######################
+  # HEAD NODE (Uniform with Workers)
+  ######################
+  headGroupSpec:
+    rayStartParams:
+      dashboard-host: '0.0.0.0'
+      block: 'true' 
+      num-gpus: "8"
+    template:
+      spec:
+        imagePullSecrets:
+          - name: nvcr-secret
+        
+        hostNetwork: true 
+        dnsPolicy: ClusterFirstWithHostNet
+
+        tolerations:
+          - key: "nvidia.com/gpu"
+            operator: "Exists"
+            effect: "NoSchedule"
+        
+        containers:
+        - name: ray-head
+          image: nvcr.io/nvidian/nemo-rl:latest
+          imagePullPolicy: Always
+          resources:
+            limits:
+              nvidia.com/gpu: 8 
+              cpu: "128"
+              memory: "1500Gi"
+            requests:
+              nvidia.com/gpu: 8
+              cpu: "128"
+              memory: "1500Gi"
+          env:
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: "all"
+             # IMPORTANT: Verify the correct network interface with your cluster admin
+             # Common values: bond0, eth0, ib0 (for InfiniBand)
+             # Run 'ip addr' or 'ifconfig' on a node to identify available interfaces
+            - name: NCCL_SOCKET_IFNAME
+              value: bond0
+            - name: NCCL_SHM_DISABLE
+              value: "0"
+            - name: RAY_memory_monitor_refresh_ms
+              value: "0"
+            - name: HF_HOME
+              value: "/shared/huggingface"
+          volumeMounts:
+            # All code and data now live here
+            - mountPath: /shared
+              name: shared-vol
+            - mountPath: /dev/shm
+              name: dshm
+        volumes:
+          - name: shared-vol
+            persistentVolumeClaim:
+              claimName: nemo-shared-workspace
+          - name: dshm
+            emptyDir:
+              medium: Memory
+              sizeLimit: "1000Gi"
+
+  ##########################
+  # WORKER NODES (H100)
+  ##########################
+  workerGroupSpecs:
+  - replicas: 1
+    minReplicas: 1
+    maxReplicas: 1
+    groupName: gpu-group-h100
+    rayStartParams:
+      block: 'true'
+      num-gpus: "8"
+    template:
+      spec:
+        imagePullSecrets:
+          - name: nvcr-secret
+        
+        hostNetwork: true 
+        dnsPolicy: ClusterFirstWithHostNet
+        
+        affinity:
+          podAntiAffinity:
+            requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                - key: ray.io/node-type
+                  operator: In
+                  values: ["worker", "head"]
+              topologyKey: "kubernetes.io/hostname"
+
+        containers:
+        - name: ray-worker
+          image: nvcr.io/nvidian/nemo-rl:latest
+          imagePullPolicy: Always
+          resources:
+            limits:
+              nvidia.com/gpu: 8 
+              cpu: "128"
+              memory: "1500Gi"
+            requests:
+              nvidia.com/gpu: 8
+              cpu: "128"
+              memory: "1500Gi"
+          env:
+             # IMPORTANT: Verify the correct network interface with your cluster admin
+             # Common values: bond0, eth0, ib0 (for InfiniBand)
+             # Run 'ip addr' or 'ifconfig' on a node to identify available interfaces
+            - name: NCCL_SOCKET_IFNAME
+              value: bond0
+            - name: NCCL_SHM_DISABLE
+              value: "0"
+            - name: RAY_memory_monitor_refresh_ms
+              value: "0"
+            - name: HF_HOME
+              value: "/shared/huggingface"
+          volumeMounts:
+            - mountPath: /shared
+              name: shared-vol
+            - mountPath: /dev/shm
+              name: dshm
+        
+        tolerations:
+          - key: "nvidia.com/gpu"
+            operator: "Exists"
+            effect: "NoSchedule"
+        volumes:
+          - name: shared-vol
+            persistentVolumeClaim:
+              claimName: nemo-shared-workspace
+          - name: dshm
+            emptyDir:
+              medium: Memory
+              sizeLimit: "1000Gi"
+
+```
+
+**Cluster Management Commands:**
+
+* **Startup:** `kubectl create -f nemo-rl-h100.yaml`
+* **Shutdown:** `kubectl delete -f nemo-rl-h100.yaml`
+
+---
+
+## Phase 3: Run Sample NemoRL Workloads
+
+Once the cluster is running, you can interact with the Ray head node to submit jobs.
+
+### 1. Access the Head Node
+```bash
+kubectl exec -it $(kubectl get pod -l ray.io/node-type=head -o jsonpath='{.items[0].metadata.name}') -- /bin/bash
+```
+
+### 2. Setup Code on Shared Volume
+Inside the pod, clone the code to the shared PVC (`/shared`). This ensures workers can see the code.
+
+```bash
+cd /shared
+git clone [https://github.com/NVIDIA-NeMo/RL](https://github.com/NVIDIA-NeMo/RL) --recursive
+cd RL
+git submodule update --init --recursive
+```
+
+### 3. Submit a Job
+Move to the code directory, edit your configuration, and run the job.
+
+```bash
+cd /shared/RL
+
+# Edit config (e.g., paths, model config)
+vim examples/configs/grpo_math_1B.yaml 
+
+# Set environment variables
+export HF_TOKEN=...
+export WANDB_API_KEY=...
+
+# Run the job
+uv run examples/run_grpo_math.py \
+  --config examples/configs/grpo_math_1B.yaml
+```
+
+### 4. Configuration Adjustments
+To run across multiple nodes, or to ensure logs/checkpoints persist, update your YAML config file (`examples/configs/grpo_math_1B.yaml`):
+
+**Cluster Size:**
+```yaml
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
+```
+
+**Logging & Checkpointing:**
+Redirect these to `/shared` so they persist after the pod is deleted.
+
+```yaml
+checkpointing:
+  enabled: true
+  checkpoint_dir: "/shared/results/grpo"
+
+# ...
+
+logger:
+  log_dir: "/shared/logs"  # Base directory for all logs
+  wandb_enabled: true
+  wandb:
+    project: "grpo-dev"
+    name: "grpo-dev-logger"
+```
+
+### 5. Monitoring
+* **Console:** Watch job progress directly in the terminal where you ran `uv run`.
+* **WandB:** If enabled, check the Weights & Biases web interface.
+
+---
+
+## Utility: PVC Busybox Helper
+
+Use a lightweight "busybox" pod to inspect the PVC or copy data in/out without spinning up a heavy GPU node.
+
+**Create the Busybox Pod:**
+
+```bash
+# Variables
+PVC_NAME=nemo-shared-workspace
+MOUNT_PATH=/shared
+
+kubectl create -f - <<EOF
+apiVersion: v1
+kind: Pod
+metadata:
+  name: nemo-workspace-busybox
+spec:
+  containers:
+  - name: busybox
+    image: busybox
+    command: ["sleep", "infinity"]
+    volumeMounts:
+    - name: workspace
+      mountPath: ${MOUNT_PATH}
+  volumes:
+  - name: workspace
+    persistentVolumeClaim:
+      claimName: ${PVC_NAME}
+EOF
+```
+
+**Usage:**
+
+* **Inspect files:**
+    ```bash
+    kubectl exec -it nemo-workspace-busybox -- sh
+    # inside the pod:
+    ls /shared/results/grpo/
+    ```
+
+* **Copy data (Local -> PVC):**
+    ```bash
+    kubectl cp ./my-nemo-code nemo-workspace-busybox:/shared/
+    ```
diff --git a/docs/design-docs/dependency-management.md b/docs/design-docs/dependency-management.md
index 26151f7809..b2d3a21700 100644
--- a/docs/design-docs/dependency-management.md
+++ b/docs/design-docs/dependency-management.md
@@ -161,7 +161,7 @@ The rebuilt container will have all virtual environments pre-cached with your up
 
 ### Option 3: Classic Workflow - Mounting Modified Submodules
 
-For situations where you're **only changing submodules** (like nemo-automodel, Penguin, Megatron-LM, or Megatron-Bridge) but **not changing Python package versions**, you can use a classic mounting approach. This workflow assumes that the non-submodule Python packages in your local checkout match what the container was built with.
+For situations where you're **only changing submodules** (like nemo-automodel, NeMo Gym, Megatron-LM, or Megatron-Bridge) but **not changing Python package versions**, you can use a classic mounting approach. This workflow assumes that the non-submodule Python packages in your local checkout match what the container was built with.
 
 The container's NeMo RL code is located at `/opt/nemo-rl`. By mounting your local `3rdparty/` directory over the container's `/opt/nemo-rl/3rdparty/`, you can swap out submodules without rebuilding environments or containers.
 
@@ -193,7 +193,7 @@ This mounts:
 > [!IMPORTANT]
 > This workflow is **only suitable when**:
 > - Python package versions in `pyproject.toml` and `uv.lock` haven't changed
-> - You're only modifying code within submodules (nemo-automodel, Penguin, Megatron-LM, Megatron-Bridge)
+> - You're only modifying code within submodules (nemo-automodel, NeMo Gym, Megatron-LM, Megatron-Bridge)
 > - The submodule commits/branches are compatible with the installed package versions
 
 If you've changed Python package versions or dependencies outside of submodules, use Option 1 (`NRL_FORCE_REBUILD_VENVS=true`) or Option 2 (rebuild the container) instead.
diff --git a/docs/guides/grpo.md b/docs/guides/grpo.md
index 08a2d5fc19..b27d68f49d 100755
--- a/docs/guides/grpo.md
+++ b/docs/guides/grpo.md
@@ -38,18 +38,43 @@ To support this, we need to know:
 
 #### Dataset
 
-By default, NeMo RL has support for [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py) and [DeepScaler](../../nemo_rl/data/datasets/response_datasets/deepscaler.py) datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+By default, NeMo RL has some built-in supported datasets (e.g., [OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py), [Squad](../../nemo_rl/data/datasets/response_datasets/squad.py), etc.). You can see the full list [here](../../nemo_rl/data/datasets/response_datasets/__init__.py).
+All of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
 
 We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with JSONL-formatted response datasets for loading datasets from local path or Hugging Face. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:
 ```yaml
 data:
-  dataset_name: ResponseDataset
-  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
-  val_data_path: <PathToValidationDataset>
-  input_key: <QuestionKey>, default is "input"
-  output_key: <AnswerKey>, default is "output"
-  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
-  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # other data settings, see `examples/configs/grpo_math_1B.yaml` for more details
+  ...
+  # dataset settings
+  train:
+    # this dataset will override input_key and use the default values for other vars
+    data_path: /path/to/local/train_dataset.jsonl  # local file or hf_org/hf_dataset_name (HuggingFace)
+    input_key: question
+    split: train  # used for HuggingFace datasets
+    split_validation_size: 0.05  # use 5% of the training data as validation data
+    seed: 42  # seed for train/validation split when split_validation_size > 0
+  validation:
+    # this dataset will use the default values for other vars except data_path
+    data_path: /path/to/local/val_dataset.jsonl
+  default:
+    # will use below vars as default values if dataset doesn't specify it
+    dataset_name: ResponseDataset
+    input_key: input
+    output_key: output
+    prompt_file: null
+    system_prompt_file: null
+    processor: "math_hf_data_processor"
+    env_name: "math"
+```
+
+We support using a single dataset for both train and validation by using `split_validation_size` to set the validation ratio.
+[OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py), [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py), [Tulu3SftMixtureDataset](../../nemo_rl/data/datasets/response_datasets/tulu3.py) are supported for this feature.
+If you want to support this feature for your custom datasets or other built-in datasets, you can simply add the code to the dataset like [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py).
+```python
+# `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+self.val_dataset = None
+self.split_train_validation(split_validation_size, seed)
 ```
 
 #### Common Data Format
@@ -89,31 +114,19 @@ We have an example of this as `math_data_processor` in [processors.py](../../nem
 
 - task_name (unique task identifier):
   - Determines which processor, env, prompts, and dataset to use for this task.
-  - Currently, we support a single dataset and a single environment. Therefore, task_name equals the dataset_name in config (i.e., config.data.dataset_name).
+  - Currently, we support a single dataset and a single environment. Therefore, task_name equals the dataset_name in the config (i.e., config.data.dataset_name).
 - task_spec (TaskDataSpec):
-  - Specifies per-task system prompt and prompt (with defaults applied from a global spec when unspecified).
+  - Specifies per-task system prompt and prompt.
 - task_data_processors:
   - Dict mapping: task_name -> (task_spec, processor_fn).
-  - Typical flow: provide a default mapping using defaultdict, then explicitly register the dataset-provided processor under the resolved task_name.
+- task_to_env:
+  - Dict mapping: task_name -> task_env.
 
 Example (simplified):
 
 ```python
-default_task_spec = TaskDataSpec(
-    task_name="math_default",
-    prompt_file=data_config["prompt_file"],
-    system_prompt_file=data_config["system_prompt_file"],
-)
-
-task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = defaultdict(
-    lambda: (default_task_spec, math_hf_data_processor)
-)
-
-# Resolve task_name from dataset or spec
-task_spec = data.task_spec
-task_name = data.task_name
-assert hasattr(data, "processor"), "Dataset must have a processor attribute"
-task_data_processors[task_name] = (task_spec, data.processor)
+task_data_processors = {data.task_name: (data.task_spec, data.processor)}
+task_to_env = {data.task_name: env}
 ```
 
 #### Putting It All Together
@@ -128,50 +141,43 @@ Then, you can set the data up as follows:
 
 ```python
 
-# 1) Select environment from data config
-env_name = data_config["env_name"]
-env = create_env(env_name=env_name, env_configs=env_configs)
+# 1) Setup environments from data config
+env_name_list = extract_necessary_env_names(data_config)
+envs = {
+    env_name: create_env(env_name=env_name, env_config=env_configs[env_name])
+    for env_name in env_name_list
+}
 
-# 2) Build default TaskDataSpec from config (prompts loaded from files if present)
-default_task_spec = TaskDataSpec(
-    task_name="math_default",
-    prompt_file=data_config["prompt_file"],
-    system_prompt_file=data_config["system_prompt_file"],
-)
-
-# 3) Define default processor mapping
-task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = defaultdict(
-    lambda: (default_task_spec, math_hf_data_processor)
-)
-
-# 4) Load dataset using the helper (built-ins or local/HF datasets)
-data = load_response_dataset(data_config, seed)
+# 2) Load dataset using the helper (built-ins or local/HF datasets)
+data = load_response_dataset(data_config["train"])
 
-# 5) Resolve task spec/name and ensure dataset provides a processor
-task_spec = data.task_spec
-task_name = data.task_name
-assert hasattr(data, "processor"), "Dataset must have a processor attribute"
-task_data_processors[task_name] = (task_spec, data.processor)
+# 3) Build task mapping
+task_data_processors = {data.task_name: (data.task_spec, data.processor)}
+task_to_env = {data.task_name: envs[data_config["train"]["env_name"]]}
 
-# 6) Construct processed datasets (train and optional validation)
+# 4) Construct processed dataset
 dataset = AllTaskProcessedDataset(
-    data.formatted_ds["train"],
+    data.dataset,
     tokenizer,
-    default_task_spec,
+    None,
     task_data_processors,
     max_seq_length=data_config["max_input_seq_length"],
 )
-val_dataset = (
-    AllTaskProcessedDataset(
-        data.formatted_ds["validation"],
+
+# 5) Do the same thing for validation dataset if it exists
+if "validation" in data_config and data_config["validation"] is not None:
+    val_data = load_response_dataset(data_config["validation"])
+
+    val_task_data_processors = {val_data.task_name: (val_data.task_spec, val_data.processor)}
+    val_task_to_env = {val_data.task_name: envs[data_config["validation"]["env_name"]]}
+
+    val_dataset = AllTaskProcessedDataset(
+        val_data.dataset,
         tokenizer,
-        default_task_spec,
-        task_data_processors,
+        None,
+        val_task_data_processors,
         max_seq_length=data_config["max_input_seq_length"],
     )
-    if data.formatted_ds["validation"]
-    else None
-)
 ```
 
 Ensure you provide a mapping of tasks to their processors so the dataset knows which processor to use when handling samples.
@@ -185,7 +191,7 @@ For more information about environments, see the [Environments Guide](environmen
 ### Env–Task Mapping
 
 - env:
-  - The environment actor for reward/evaluation, constructed using `create_env(env_name=..., env_configs=...)`.
+  - The environment actor for reward/evaluation, constructed using `create_env(env_name=..., env_config=...)`.
   - The environment to use is declared under the data section of the config (e.g., `data.env_name` states which env the dataset uses).
 - task_to_env:
   - Dict mapping: task_name -> env. In the current single-task setup this typically points all tasks to the same env, but this structure enables different envs per task in future multi-task scenarios.
@@ -193,11 +199,13 @@ For more information about environments, see the [Environments Guide](environmen
 Example (simplified):
 
 ```python
-env_name = data_config["env_name"]  # declared under config.data
-env = create_env(env_name=env_name, env_configs=env_configs)
+env_name_list = extract_necessary_env_names(data_config)
+envs = {
+    env_name: create_env(env_name=env_name, env_config=env_configs[env_name])
+    for env_name in env_name_list
+}
 
-task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: env)
-task_to_env[task_name] = env
+task_to_env[task_name] = envs[data_config["train"]["env_name"]]
 val_task_to_env = task_to_env  # validation usually mirrors training mapping
 ```
 
@@ -335,7 +343,7 @@ $$
 \text{token-mult-prob-error} = \frac{1}{n}\sum_{i=1}^{n\text{(tokens)}}\exp\left(\left\|\text{log-train-fwk}_i - \text{logprobs-inference-fwk}_i\right\|\right)
 $$
 
-Intuitively, this measures the average multiplicative probability error for sampled tokens, where samples are drawn as $x \sim \pi_{\text{inference-framework}}$. The purpose of this is to highlight any obvious sampling errors or discrepencies between the inference backend and training framework. If it trends upward steeply over the course of training past $\sim 1-2\%$, there is usually a problem with how your weights are being updated. If very spiky, it can indicate a bug in the inference framework or buggy weight refitting.
+Intuitively, this measures the average multiplicative probability error for sampled tokens, where samples are drawn as $x \sim \pi_{\text{inference-framework}}$. The purpose of this is to highlight any obvious sampling errors or discrepancies between the inference backend and training framework. If it trends upward steeply over the course of training past $\sim 1-2\%$, there is usually a problem with how your weights are being updated. If these metrics are very spiky, they can indicate a bug in the inference framework or buggy weight refitting.
 
 ### KL Divergence Error
 This feature is controlled by the following metrics:
@@ -346,7 +354,7 @@ This feature is controlled by the following metrics:
 * `js_divergence_error` or (Jensen–Shannon divergence): $(D_{\text{KL}}(P_{policy} || P_{m}) + D_{\text{KL}}(P_{gen} || P_{m})) / 2$, where $P_{m} = (P_{policy} + P_{gen}) / 2$
   - uses the mean mixture distribution as reference
 
-According to the paper [When Speed Kills Stability: Demystifying RL Collapse from the Training-Inference Mismatch](https://yingru.notion.site/When-Speed-Kills-Stability-Demystifying-RL-Collapse-from-the-Training-Inference-Mismatch-271211a558b7808d8b12d403fd15edda), `gen_kl_error` was introduced (referred to as `vllm-kl` in the paper) as the key metric to measure mismatch between policy and generation distribution. Empirically, the mismatch is approximately 1e-3, and the divergence is larger for low-probability tokens as predicted by the generation inference engine (like vLLM).
+According to the paper [When Speed Kills Stability: Demystifying RL Collapse from the Training-Inference Mismatch](https://yingru.notion.site/When-Speed-Kills-Stability-Demystifying-RL-Collapse-from-the-Training-Inference-Mismatch-271211a558b7808d8b12d403fd15edda), `gen_kl_error` was introduced (referred to as `vllm-kl` in the paper) as the key metric to measure the mismatch between the policy and generation distributions. Empirically, the mismatch is approximately 1e-3, and the divergence is larger for low-probability tokens as predicted by the generation inference engine (like vLLM).
 
 The three divergence metrics provide complementary perspectives on distribution mismatch. For example:
 
@@ -371,7 +379,7 @@ This feature is controlled by the parameter `sampling_importance_ratio`. It adju
 
 This is simply $\frac{1}{|T|}\sum_{t \in \text{tokens}}\text{exp}(\text{log}(\pi_{\text{training}}(t)) - \text{log}(\pi_{\text{inference}}(t)))$
 
-Similar to [Multiplicative Token Probability Error](#multiplicative-token-probability-error), this is a measure of how far off your inference backend is from your training framework. However, this metric is meant to find the bias in that error instead of loosely the variance as it does not take the absolute value of the error. With some noise, this should hover around 1.
+Similar to [Multiplicative Token Probability Error](#multiplicative-token-probability-error), this is a measure of how far off your inference backend is from your training framework. However, this metric is meant to find the bias in that error, rather than the variance, as it does not take the absolute value of the error. With some noise, this should hover around 1.
 
 This metric is always calculated and the per-token version (without the mean) is used in the loss function when [Importance Sampling Correction](#importance-sampling-correction) is enabled.
 
diff --git a/docs/guides/sft.md b/docs/guides/sft.md
index 81e68d5cc3..2d74914ca2 100644
--- a/docs/guides/sft.md
+++ b/docs/guides/sft.md
@@ -37,7 +37,7 @@ SFT datasets in NeMo RL are encapsulated using classes. Each SFT data class is e
 SFT datasets are expected to follow the HuggingFace chat format. Refer to the [chat dataset document](../design-docs/chat-datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [response_datasets/squad.py](../../nemo_rl/data/datasets/response_datasets/squad.py) has an example:
 
 ```python
-def format_squad(data):
+def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
     return {
         "messages": [
             {
@@ -56,7 +56,7 @@ def format_squad(data):
     }
 ```
 
-NeMo RL SFT uses HuggingFace chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example):
+NeMo RL SFT uses Hugging Face chat templates to format the individual examples. Three types of chat templates are supported, which can be configured using the `tokenizer.chat_template` in your YAML config (see [sft.yaml](../../examples/configs/sft.yaml) for an example):
 
 1. Apply the tokenizer's default chat template. To use the tokenizer's default, either omit `tokenizer.chat_template` from the config altogether, or set `tokenizer.chat_template="default"`.
 2. Use a "passthrough" template which simply concatenates all messages. This is desirable if the chat template has been applied to your dataset as an offline preprocessing step. In this case, you should set `tokenizer.chat_template` to None as follows:
@@ -64,25 +64,49 @@ NeMo RL SFT uses HuggingFace chat templates to format the individual examples. T
     tokenizer:
       chat_template: NULL
     ```
-3. Use a custom template: If you would like to use a custom template, create a string template in [jinja format](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template), and add that string to the config. For example,
+3. Use a custom template: If you would like to use a custom template, create a string template in [Jinja format](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template), and add that string to the config. For example,
 
     ```yaml
     tokenizer:
     custom_template: "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer: '}}{%- elif message['role'] == 'assistant'  %}{{message['content'].strip()}}{%- endif %}{% endfor %}"
     ```
 
-By default, NeMo RL has support for [OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [Squad](../../nemo_rl/data/datasets/response_datasets/squad.py) and [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py) datasets. All of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+By default, NeMo RL has some built-in supported datasets (e.g., [OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py), [Squad](../../nemo_rl/data/datasets/response_datasets/squad.py), etc.), you can see the full list [here](../../nemo_rl/data/datasets/response_datasets/__init__.py).
+All of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
 
-We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with jsonl-formatted response datasets for loading datasets from local path or HuggingFace. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:
+We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with JSONL-formatted response datasets for loading datasets from local path or Hugging Face. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:
 ```yaml
 data:
-  dataset_name: ResponseDataset
-  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
-  val_data_path: <PathToValidationDataset>
-  input_key: <QuestionKey>, default is "input"
-  output_key: <AnswerKey>, default is "output"
-  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
-  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # other data settings, see `examples/configs/sft.yaml` for more details
+  ...
+  # dataset settings
+  train:
+    # this dataset will override input_key and use the default values for other vars
+    data_path: /path/to/local/train_dataset.jsonl  # local file or hf_org/hf_dataset_name (HuggingFace)
+    input_key: question
+    split: train  # used for HuggingFace datasets
+    split_validation_size: 0.05  # use 5% of the training data as validation data
+    seed: 42  # seed for train/validation split when split_validation_size > 0
+  validation:
+    # this dataset will use the default values for other vars except data_path
+    data_path: /path/to/local/val_dataset.jsonl
+  default:
+    # will use below vars as default values if dataset doesn't specify it
+    dataset_name: ResponseDataset
+    input_key: input
+    output_key: output
+    prompt_file: null
+    system_prompt_file: null
+    processor: "sft_processor"
+```
+
+We support using a single dataset for both train and validation by using `split_validation_size` to set the ratio of validation.
+[OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py), [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py), [Tulu3SftMixtureDataset](../../nemo_rl/data/datasets/response_datasets/tulu3.py) are supported for this feature.
+If you want to support this feature for your custom datasets or other built-in datasets, you can simply add the code to the dataset like [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py).
+```python
+# `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+self.val_dataset = None
+self.split_train_validation(split_validation_size, seed)
 ```
 
 ### OpenAI Format Datasets (with Tool Calling Support)
@@ -95,14 +119,16 @@ To use an OpenAI format dataset, configure your YAML as follows:
 
 ```yaml
 data:
-  dataset_name: openai_format
-  train_data_path: "/path/to/train.jsonl"  # Path to training data
-  val_data_path: "/path/to/val.jsonl"      # Path to validation data
-  chat_key: "messages"                     # Key for messages in the data (default: "messages")
-  system_key: null                         # Key for system message in the data (optional)
-  system_prompt: null                      # Default system prompt if not in data (optional)
-  tool_key: "tools"                        # Key for tools in the data (default: "tools")
-  use_preserving_dataset: false            # Set to true for heterogeneous tool schemas (see below)
+  train:
+    dataset_name: openai_format
+    data_path: <PathToTrainingDataset>       # Path to training data
+    chat_key: "messages"                     # Key for messages in the data (default: "messages")
+    system_key: null                         # Key for system message in the data (optional)
+    system_prompt: null                      # Default system prompt if not in data (optional)
+    tool_key: "tools"                        # Key for tools in the data (default: "tools")
+    use_preserving_dataset: false            # Set to true for heterogeneous tool schemas (see below)
+  validation:
+    ...
 ```
 
 #### Data Format
@@ -169,7 +195,7 @@ NeMo RL supports LoRA (Low-Rank Adaptation) for parameter-efficient fine-tuning.
 
 Notes:
 - LoRA is supported with DTensor v2 and Megatron backends. Uses the DTensor backend by default. DTensor v1 does not support LoRA (ensure `policy.dtensor_cfg._v2=true` when using DTensor).
-- Triton kernels are only used in the DTensor v2 path. For TP > 1, Automodel currently does not support Triton kernels (see note below).
+- Triton kernels are only used in the DTensor v2 path. For `tensor_parallel_size > 1`, Automodel currently does not support Triton kernels (see note below).
 
 ### DTensor Configuration Parameters
 
diff --git a/docs/index.md b/docs/index.md
index d37b4c2098..7d216a9bf3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -171,6 +171,7 @@ Comprehensive reference for all NeMo RL modules, classes, functions, and methods
 
 about/overview
 about/performance-summary
+about/model-support
 about/features
 about/backends
 about/quick-start
diff --git a/examples/configs/distillation_math.yaml b/examples/configs/distillation_math.yaml
index 62937754f1..078b2bef96 100644
--- a/examples/configs/distillation_math.yaml
+++ b/examples/configs/distillation_math.yaml
@@ -107,6 +107,9 @@ policy: &POLICY_BASE
         bias_activation_fusion: True
         defer_fp32_logits: False
         moe_per_layer_logging: False
+        moe_enable_deepep: false
+        moe_token_dispatcher_type: "allgather"
+        moe_shared_expert_overlap: false
         
         optimizer:
             optimizer: "adam"
@@ -206,11 +209,20 @@ teacher:
 
 data:
     max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
-    prompt_file: "examples/prompts/cot.txt"
-    system_prompt_file: null
-    dataset_name: "DeepScaler"
     shuffle: true
 
+    # dataset
+    train:
+        dataset_name: DeepScaler
+    validation:
+        dataset_name: AIME2024
+        repeat: 16
+    # default settings for all datasets
+    default:
+        prompt_file: "examples/prompts/cot.txt"
+        system_prompt_file: null
+        env_name: "math"
+
 env:
     math:
         num_workers: 8
@@ -225,12 +237,12 @@ logger:
     monitor_gpus: true
     wandb:
         project: "nemo-distillation"
-        name: "distillation-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+        name: "distillation-${data.train.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
     swanlab:
         project: "nemo-distillation"
-        name: "distillation-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+        name: "distillation-${data.train.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
     tensorboard:
-        log_dir: "tb_logs-distillation-${data.dataset_name}"
+        log_dir: "tb_logs-distillation-${data.train.dataset_name}"
     mlflow:
         experiment_name: "distillation-dev"
         run_name: "distillation-math-cl-logger"
diff --git a/examples/configs/distillation_math_megatron.yaml b/examples/configs/distillation_math_megatron.yaml
index 644d240a7b..ae2fbcd3e1 100644
--- a/examples/configs/distillation_math_megatron.yaml
+++ b/examples/configs/distillation_math_megatron.yaml
@@ -59,6 +59,9 @@ policy: &POLICY_BASE
         bias_activation_fusion: True
         moe_per_layer_logging: False
         defer_fp32_logits: False
+        moe_enable_deepep: false
+        moe_token_dispatcher_type: "allgather"
+        moe_shared_expert_overlap: false
         
         optimizer:
             optimizer: "adam"
@@ -147,11 +150,11 @@ logger:
     wandb_enabled: true
     wandb:
         project: "nemo-distillation"
-        name: "distillation-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+        name: "distillation-megatron-${data.train.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
     tensorboard:
-        log_dir: "tb_logs-distillation-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+        log_dir: "tb_logs-distillation-megatron-${data.train.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
     mlflow:
-        run_name: "distillation-math-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+        run_name: "distillation-math-megatron-${data.train.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
 
 cluster:
     gpus_per_node: 8
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index a5e4f037af..f4b0c14816 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -49,7 +49,7 @@ policy:
 
   dtensor_cfg:
     env_vars:
-      PYTORCH_CUDA_ALLOC_CONF: ""  # Refers to https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf
+      PYTORCH_CUDA_ALLOC_CONF: ""  # Refers to https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-alloc-conf
     enabled: true
     cpu_offload: False
     sequence_parallel: false
@@ -119,6 +119,9 @@ policy:
     bias_activation_fusion: True
     defer_fp32_logits: False
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
     
     optimizer:
       optimizer: "adam"
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index ec8c2c5ecc..a7a9fe86b6 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -116,6 +116,9 @@ policy:
     bias_activation_fusion: True
     defer_fp32_logits: False
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
 
     optimizer:
       optimizer: "adam"
@@ -255,22 +258,38 @@ policy:
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
-  prompt_file: "examples/prompts/cot.txt"
-  system_prompt_file: null
   shuffle: true
   num_workers: 1
-  processor: "math_hf_data_processor"
-  env_name: "math"
-  dataset_name: "OpenMathInstruct-2"
+
+  # dataset
+  train:
+    dataset_name: OpenMathInstruct-2
+    split_validation_size: 0.05 # use 5% of the training data as validation data
+    seed: ${grpo.seed} # seed for train/validation split when split_validation_size > 0
+  validation: null
+  # default settings for all datasets
+  default:
+    prompt_file: "examples/prompts/cot.txt"
+    system_prompt_file: null
+    processor: "math_hf_data_processor"
+    env_name: "math"
   # You can use custom response datasets for training and validation. For example:
-  #   data:
-  #     dataset_name: ResponseDataset
-  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
-  #     val_data_path: <PathToValidationDataset>
-  #     input_key: <QuestionKey>, default is "input"
-  #     output_key: <AnswerKey>, default is "output"
-  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
-  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # train:
+  #   # this dataset will override input_key and use the default values for other vars
+  #   data_path: /path/to/local/train_dataset.jsonl
+  #   input_key: question
+  # validation:
+  #   # this dataset will use the default values for other vars except data_path
+  #   data_path: /path/to/local/val_dataset.jsonl
+  # default:
+  #   # will use below vars as default values if dataset doesn't specify it
+  #   dataset_name: ResponseDataset
+  #   input_key: input
+  #   output_key: output
+  #   prompt_file: null
+  #   system_prompt_file: null
+  #   processor: "math_hf_data_processor"
+  #   env_name: math
   # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/grpo.md#datasets for more details.
 
 env:
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index 1a14b8ce64..bb51d50942 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -94,6 +94,9 @@ policy:
     moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
     moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
     moe_permute_fusion: false
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
@@ -157,13 +160,6 @@ policy:
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
 
-data:
-  max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
-  prompt_file: "examples/prompts/cot.txt"
-  system_prompt_file: null
-  dataset_name: "OpenMathInstruct-2"
-  shuffle: true
-
 env:
   math:
     num_workers: 8
diff --git a/examples/configs/grpo_math_1B_sglang.yaml b/examples/configs/grpo_math_1B_sglang.yaml
new file mode 100644
index 0000000000..17b30f3ef5
--- /dev/null
+++ b/examples/configs/grpo_math_1B_sglang.yaml
@@ -0,0 +1,25 @@
+defaults: grpo_math_1B.yaml
+
+grpo:
+  val_batch_size: 128
+
+policy:
+  generation:
+    backend: "sglang"
+    sglang_cfg:
+      # SGLang specific configuration
+      model_path: ${policy.model_name} 
+      gpus_per_server: 1 
+      dtype: ${policy.precision}
+      context_length: 512  # Maximum context length
+      allow_auto_truncate: true
+      enable_memory_saver: false
+      dp_size: 1
+      pp_size: 1
+      ep_size: 1
+      max_running_requests: null
+      mem_fraction_static: 0.7
+      skip_server_warmup: true
+
+logger:
+  wandb_enabled: true
diff --git a/examples/configs/grpo_rm_1B.yaml b/examples/configs/grpo_rm_1B.yaml
index b0a709b253..61e6204b9a 100644
--- a/examples/configs/grpo_rm_1B.yaml
+++ b/examples/configs/grpo_rm_1B.yaml
@@ -2,7 +2,8 @@
 defaults: "grpo_math_1B.yaml"
 
 data:
-  env_name: "reward_model"
+  default:
+    env_name: "reward_model"
 
 env:
   reward_model:  
diff --git a/examples/configs/grpo_sliding_puzzle.yaml b/examples/configs/grpo_sliding_puzzle.yaml
index 54e03ae524..edfc1096d1 100644
--- a/examples/configs/grpo_sliding_puzzle.yaml
+++ b/examples/configs/grpo_sliding_puzzle.yaml
@@ -77,4 +77,4 @@ logger:
     run_name: "grpo-dev-sliding_puzzle"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
-    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
\ No newline at end of file
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/recipes/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.yaml b/examples/configs/recipes/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.yaml
new file mode 100644
index 0000000000..6135f42da7
--- /dev/null
+++ b/examples/configs/recipes/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.yaml
@@ -0,0 +1,12 @@
+defaults: ./dapo-qwen2.5-7b.yaml
+policy:
+  dtensor_cfg:
+    context_parallel_size: 2
+checkpointing:
+  checkpoint_dir: results/dapo-qwen2.5-7b-16n4g-fsdp2cp2
+logger:
+  log_dir: logs/dapo-qwen2.5-7b-16n4g-fsdp2cp2
+  wandb:
+    name: dapo-qwen2.5-7b-16n4g-fsdp2cp2
+cluster:
+  gpus_per_node: 4
diff --git a/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml b/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
index 1fce7d82d4..9035a3598c 100644
--- a/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
+++ b/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
@@ -1,22 +1,16 @@
 defaults: ../../grpo_math_1B.yaml
 grpo:
   num_prompts_per_step: 512
-  num_generations_per_prompt: 16
   batch_multiplier: 3 # Multiplier for dataloader batch size calculation (batch_multiplier × num_prompts_per_step). Following DAPO dynamic sampling, the actual training batch size equals num_prompts_per_step × num_generations_per_prompt.
-  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
   max_num_steps: 10000
   use_leave_one_out_baseline: false
   val_period: 20
   max_val_samples: 960
   val_batch_size: 960
   use_dynamic_sampling: true
-  dynamic_sampling_max_gen_batches: 10
   reward_scaling:
     enabled: true
-    source_min: 0.0
-    source_max: 1.0
     target_min: -1.0
-    target_max: 1.0
   reward_shaping:
     enabled: true
     overlong_buffer_length: 2048
@@ -41,7 +35,6 @@ policy:
     _v2: false
     context_parallel_size: 4
   megatron_cfg:
-    empty_unused_memory_level: 1
     tensor_model_parallel_size: 4
     pipeline_model_parallel_size: 2
     context_parallel_size: 2
@@ -82,13 +75,16 @@ policy:
       enforce_eager: true
 data:
   max_input_seq_length: 2048
-  prompt_file: null
-  dataset_name: DAPOMath17K
+  train:
+    dataset_name: DAPOMath17K
+  validation:
+    dataset_name: DAPOMathAIME2024
+  default:
+    prompt_file: null
 env:
   math:
     num_workers: 16
-    math_verify_impl: "dapo_math_verify"
-
+    math_verify_impl: dapo_math_verify
 logger:
   monitor_gpus: false
   wandb:
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.yaml
new file mode 100644
index 0000000000..c0033a858a
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.yaml
@@ -0,0 +1,9 @@
+defaults: ./distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.yaml
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1
+  wandb:
+    name: distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.yaml
new file mode 100644
index 0000000000..95c9e85573
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.yaml
@@ -0,0 +1,16 @@
+defaults: ./distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 1
+teacher:
+  megatron_cfg:
+    tensor_model_parallel_size: 2
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack
+  wandb:
+    name: distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.yaml
new file mode 100644
index 0000000000..07b0b05576
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.yaml
@@ -0,0 +1,14 @@
+defaults: ./distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
+policy:
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 1
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long
+  wandb:
+    name: distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.yaml
new file mode 100644
index 0000000000..3d4db06f48
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.yaml
@@ -0,0 +1,10 @@
+defaults: ./dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+policy:
+  dtensor_cfg:
+    tensor_parallel_size: 1
+logger:
+  wandb:
+    name: dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.yaml
new file mode 100644
index 0000000000..8324173dfc
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.yaml
@@ -0,0 +1,11 @@
+defaults: ./dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 1
+    sequence_parallel: false
+logger:
+  wandb:
+    name: dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.yaml
new file mode 100644
index 0000000000..851e6fffc4
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.yaml
@@ -0,0 +1,10 @@
+defaults: ./dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
+checkpointing:
+  checkpoint_dir: results/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+  wandb:
+    name: dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-32n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-32n4g-megatron.yaml
new file mode 100644
index 0000000000..fb4a4bc880
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-32n4g-megatron.yaml
@@ -0,0 +1,23 @@
+defaults: ./grpo-dapomath17k-dsv3-megatron.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 4
+    expert_model_parallel_size: 16
+    pipeline_model_parallel_size: 4
+    context_parallel_size: 2
+    num_layers_in_first_pipeline_stage: 15
+    num_layers_in_last_pipeline_stage: 14
+  make_sequence_length_divisible_by: 4
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 16
+checkpointing:
+  checkpoint_dir: results/grpo-dapomath17k-dsv3-32n4g-megatron
+logger:
+  wandb:
+    name: grpo-dapomath17k-dsv3-32n4g-megatron
+  mlflow:
+    run_name: grpo-dapomath17k-dsv3-32n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml
index 6e00ecd37c..8d19757d54 100644
--- a/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml
+++ b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml
@@ -29,6 +29,8 @@ policy:
     sequence_parallel: true
     moe_permute_fusion: true
     apply_rope_fusion: false
+    moe_enable_deepep: true
+    moe_token_dispatcher_type: flex
     optimizer:
       lr: 5.0e-07
       min_lr: 5.0e-08
@@ -39,8 +41,12 @@ policy:
       async_engine: true
       tensor_parallel_size: 32
 data:
-  prompt_file: null
-  dataset_name: DAPOMath17K
+  train:
+    dataset_name: DAPOMath17K
+  validation:
+    dataset_name: DAPOMathAIME2024
+  default:
+    prompt_file: null
 logger:
   monitor_gpus: false
   wandb:
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml
index ccfa867209..d52cd0fd27 100644
--- a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml
@@ -1,6 +1,4 @@
-defaults:
-  - ../../grpo_math_1B.yaml
-  - grpo-deepscaler-1.5b-8K.yaml
+defaults: grpo-deepscaler-1.5b-8K.yaml
 loss_fn:
   reference_policy_kl_penalty: 0.001
   ratio_clip_max: 0.28
@@ -8,7 +6,4 @@ policy:
   max_total_sequence_length: 16384
   logprob_batch_size: 2
   dtensor_cfg:
-    cpu_offload: true
-    activation_checkpointing: true
     tensor_parallel_size: 2
-    _v2: false
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-1n4g-8K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-1n4g-8K.yaml
new file mode 100644
index 0000000000..61f41f2220
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-1n4g-8K.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-deepscaler-1.5b-8K.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-deepscaler-1.5b-1n4g-8K
+logger:
+  log_dir: logs/grpo-deepscaler-1.5b-1n4g-8K
+  wandb:
+    name: grpo-deepscaler-1.5b-1n4g-8K
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml
index ccdb67197b..82c3c68b63 100644
--- a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml
@@ -1,6 +1,4 @@
-defaults:
-  - ../../grpo_math_1B.yaml
-  - grpo-deepscaler-1.5b-8K.yaml
+defaults: grpo-deepscaler-1.5b-8K.yaml
 loss_fn:
   reference_policy_kl_penalty: 0.0001
   ratio_clip_max: 0.28
@@ -8,12 +6,7 @@ policy:
   max_total_sequence_length: 24576
   logprob_batch_size: 2
   dtensor_cfg:
-    cpu_offload: true
-    activation_checkpointing: true
     tensor_parallel_size: 2
-    _v2: false
-  sequence_packing:
-    enabled: false
   optimizer:
     kwargs:
       lr: 5.0e-07
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
index 584b807663..ca29b07aac 100644
--- a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
@@ -28,7 +28,11 @@ policy:
       compilation_config:
         use_inductor: false
 data:
-  dataset_name: DeepScaler
+  train:
+    dataset_name: DeepScaler
+  validation:
+    dataset_name: AIME2024
+    repeat: 16
 env:
   math:
     num_workers: 16
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..16a451cec3
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-gemma3-1b-it-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/grpo-gemma3-1b-it-1n4g-fsdp2tp1
+  wandb:
+    name: grpo-gemma3-1b-it-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.yaml
new file mode 100644
index 0000000000..750b3ca612
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.yaml
@@ -0,0 +1,17 @@
+defaults: ./grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml
+policy:
+  dtensor_cfg:
+    tensor_parallel_size: 4
+  make_sequence_length_divisible_by: 4
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+checkpointing:
+  checkpoint_dir: results/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long
+logger:
+  log_dir: logs/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long
+  wandb:
+    name: grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-gptoss-20b-8n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-gptoss-20b-8n4g-megatron.yaml
new file mode 100644
index 0000000000..c9719f381f
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-gptoss-20b-8n4g-megatron.yaml
@@ -0,0 +1,11 @@
+defaults: ./grpo-gptoss-20b-8n8g-megatron.yaml
+policy:
+  megatron_cfg:
+    expert_model_parallel_size: 4
+    tensor_model_parallel_size: 2
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml b/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml
index d5525fc027..e98d7d4680 100644
--- a/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml
+++ b/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml
@@ -30,7 +30,11 @@ policy:
     vllm_cfg:
       enforce_eager: true
 data:
-  dataset_name: DeepScaler
+  train:
+    dataset_name: DeepScaler
+  validation:
+    dataset_name: AIME2024
+    repeat: 16
 env:
   math:
     num_workers: 16
diff --git a/examples/configs/recipes/llm/grpo-helpsteer3-llama-3.3-nemotron-super-49b-v1.5-8n8g-fsdp2tp8cp4.yaml.disabled b/examples/configs/recipes/llm/grpo-helpsteer3-llama-3.3-nemotron-super-49b-v1.5-8n8g-fsdp2tp8cp4.yaml.disabled
index b1f65495fa..f442856807 100644
--- a/examples/configs/recipes/llm/grpo-helpsteer3-llama-3.3-nemotron-super-49b-v1.5-8n8g-fsdp2tp8cp4.yaml.disabled
+++ b/examples/configs/recipes/llm/grpo-helpsteer3-llama-3.3-nemotron-super-49b-v1.5-8n8g-fsdp2tp8cp4.yaml.disabled
@@ -44,11 +44,16 @@ policy:
 data:
   # Training with HelpSteer3 will lead to high logprob error.
   # ISSUE: https://github.com/NVIDIA-NeMo/RL/issues/1570
-  prompt_file: null
-  dataset_name: HelpSteer3
-  split: preference
-  env_name: "code_jaccard"
-  processor: helpsteer3_data_processor
+  train:
+    dataset_name: HelpSteer3
+    split: train
+  validation:
+    dataset_name: HelpSteer3
+    split: validation
+  default:
+    prompt_file: null
+    env_name: "code_jaccard"
+    processor: helpsteer3_data_processor
 env:
   code_jaccard:
     num_workers: 8
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.yaml
new file mode 100644
index 0000000000..7aea28ad08
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.yaml
@@ -0,0 +1,14 @@
+defaults: ./grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated
+  wandb:
+    name: grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated
+cluster:
+  gpus_per_node: 4
+policy:
+  generation:
+    colocated:
+      resources:
+        gpus_per_node: 4
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.yaml
new file mode 100644
index 0000000000..75c43601bf
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long
+  wandb:
+    name: grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.yaml
new file mode 100644
index 0000000000..9d04e2c89c
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+  wandb:
+    name: grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.yaml
new file mode 100644
index 0000000000..43608a9402
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-llama3.2-1b-instruct-1n8g-megatron.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n4g-megatron
+logger:
+  log_dir: logs/grpo-llama3.2-1b-instruct-1n4g-megatron
+  wandb:
+    name: grpo-llama3.2-1b-instruct-1n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.yaml
new file mode 100644
index 0000000000..46c1a31fb5
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-llama3.2-1b-instruct-1n8g-megatron_generation.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n4g-megatron_generation
+logger:
+  log_dir: logs/grpo-llama3.2-1b-instruct-1n4g-megatron_generation
+  wandb:
+    name: grpo-llama3.2-1b-instruct-1n4g-megatron_generation
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n4g-megatron.yaml
new file mode 100644
index 0000000000..97d6ffede7
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n4g-megatron.yaml
@@ -0,0 +1,15 @@
+defaults: ./grpo-moonlight-16ba3b-4n8g-megatron.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-moonlight-16ba3b-4n4g-megatron
+policy:
+  megatron_cfg:
+    expert_model_parallel_size: 2
+    pipeline_model_parallel_size: 2
+    num_layers_in_first_pipeline_stage: 14
+    num_layers_in_last_pipeline_stage: 13
+logger:
+  wandb:
+    name: grpo-moonlight-16ba3b-4n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml
index e1e38fbbfc..83ea6128ef 100644
--- a/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml
@@ -31,6 +31,8 @@ policy:
       lr: 1.0e-06
     scheduler:
       lr_warmup_iters: 50
+    moe_enable_deepep: true
+    moe_token_dispatcher_type: flex
 logger:
   monitor_gpus: false
   wandb:
diff --git a/examples/configs/recipes/llm/grpo-nano-v2-12b-1n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-nano-v2-12b-1n4g-megatron.yaml
new file mode 100644
index 0000000000..da8301a19b
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-nano-v2-12b-1n4g-megatron.yaml
@@ -0,0 +1,13 @@
+defaults: ./grpo-nano-v2-12b-1n8g-megatron.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-nano-v2-12b-1n4g-megatron
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 4
+logger:
+  log_dir: logs/grpo-nano-v2-12b-1n4g-megatron
+  wandb:
+    name: grpo-nano-v2-12b-1n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..82164d8bf4
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-nano-v2-12b-2n8g-fsdp2tp1.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-nano-v2-12b-2n4g-fsdp2tp1
+logger:
+  log_dir: logs/grpo-nano-v2-12b-2n4g-fsdp2tp1
+  wandb:
+    name: grpo-nano-v2-12b-2n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.yaml
new file mode 100644
index 0000000000..de3fa543f6
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.yaml
@@ -0,0 +1,17 @@
+defaults: ./grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml
+policy:
+  dtensor_cfg:
+    tensor_parallel_size: 4
+  make_sequence_length_divisible_by: 4
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long
+logger:
+  log_dir: logs/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long
+  wandb:
+    name: grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.yaml
new file mode 100644
index 0000000000..f364d2ffa0
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.yaml
@@ -0,0 +1,17 @@
+defaults: ./grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2
+policy:
+  dtensor_cfg:
+    tensor_parallel_size: 2
+  make_sequence_length_divisible_by: 2
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2
+  wandb:
+    name: grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.yaml
new file mode 100644
index 0000000000..b21c9dd51f
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.yaml
@@ -0,0 +1,17 @@
+defaults: ./grpo-qwen2.5-7b-instruct-4n8g-megatron.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 1
+  make_sequence_length_divisible_by: 2
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n4g-megatron
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n4g-megatron
+  wandb:
+    name: grpo-qwen2.5-7b-instruct-4n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.yaml
new file mode 100644
index 0000000000..a5ede32956
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.yaml
@@ -0,0 +1,10 @@
+defaults: ./grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1
+  wandb:
+    name: grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.yaml
new file mode 100644
index 0000000000..6c377cb8a0
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.yaml
@@ -0,0 +1,48 @@
+defaults: ../../grpo_math_1B.yaml
+
+grpo:
+  max_num_steps: 450
+
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang
+
+policy:
+  model_name: Qwen/Qwen2.5-Math-1.5B-Instruct
+  tokenizer:
+    name: Qwen/Qwen2.5-Math-1.5B-Instruct
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  generation:
+    backend: "sglang"
+    max_new_tokens: 512
+    sglang_cfg:
+      model_path: ${policy.model_name}
+      gpus_per_server: 1
+      dtype: ${policy.precision}
+      context_length: 512
+      allow_auto_truncate: true
+      enable_memory_saver: false
+      dp_size: 1
+      pp_size: 1
+      ep_size: 1
+      max_running_requests: null
+      mem_fraction_static: 0.5
+      skip_server_warmup: true
+
+data:
+  max_input_seq_length: 512
+
+logger:
+  log_dir: logs/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang
+
+cluster:
+  gpus_per_node: 8
+
diff --git a/examples/configs/recipes/llm/grpo-qwen3-0.6b-1n8g-sglang.yaml b/examples/configs/recipes/llm/grpo-qwen3-0.6b-1n8g-sglang.yaml
new file mode 100644
index 0000000000..30c6f5f76c
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen3-0.6b-1n8g-sglang.yaml
@@ -0,0 +1,49 @@
+defaults: ../../grpo_math_1B.yaml
+
+grpo:
+  max_num_steps: 500
+  val_batch_size: 128
+
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-0.6b-1n8g-sglang
+
+policy:
+  model_name: Qwen/Qwen3-0.6B
+  tokenizer:
+    name: Qwen/Qwen3-0.6B
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  generation:
+    backend: "sglang"
+    max_new_tokens: 512
+    sglang_cfg:
+      model_path: ${policy.model_name}
+      gpus_per_server: 8
+      dtype: ${policy.precision}
+      context_length: 512
+      allow_auto_truncate: true
+      enable_memory_saver: false
+      dp_size: 1
+      pp_size: 1
+      ep_size: 1
+      max_running_requests: null
+      mem_fraction_static: 0.7
+      skip_server_warmup: true
+
+data:
+  max_input_seq_length: 512
+
+logger:
+  log_dir: logs/grpo-qwen3-0.6b-1n8g-sglang
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-0.6b-1n8g-sglang
+
+cluster:
+  gpus_per_node: 8
+
diff --git a/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n4g-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n4g-megatron.yaml
new file mode 100644
index 0000000000..79fbda389d
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n4g-megatron.yaml
@@ -0,0 +1,19 @@
+defaults: ./grpo-qwen3-30ba3b-8n8g-megatron.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 2
+    pipeline_model_parallel_size: 2
+    expert_model_parallel_size: 2
+  make_sequence_length_divisible_by: 2
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-30ba3b-8n4g-megatron
+logger:
+  log_dir: logs/grpo-qwen3-30ba3b-8n4g-megatron
+  wandb:
+    name: grpo-qwen3-30ba3b-8n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/grpo-qwen3-8b-base-1n8g-fp8-kvcache-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen3-8b-base-1n8g-fp8-kvcache-megatron.yaml
index 78b4597c2c..69ff4a4229 100644
--- a/examples/configs/recipes/llm/grpo-qwen3-8b-base-1n8g-fp8-kvcache-megatron.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen3-8b-base-1n8g-fp8-kvcache-megatron.yaml
@@ -37,8 +37,12 @@ policy:
       use_deep_gemm: true
 data:
   max_input_seq_length: 2048
-  prompt_file: null
-  dataset_name: DAPOMath17K
+  train:
+    dataset_name: DAPOMath17K
+  validation:
+    dataset_name: DAPOMathAIME2024
+  default:
+    prompt_file: null
 env:
   dapo:
     num_workers: 16
diff --git a/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n4g.yaml.swp b/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n4g.yaml.swp
new file mode 100644
index 0000000000..287b7b0973
Binary files /dev/null and b/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n4g.yaml.swp differ
diff --git a/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n8g.yaml.swp b/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n8g.yaml.swp
new file mode 100644
index 0000000000..98e5b39f68
Binary files /dev/null and b/examples/configs/recipes/llm/performance/.grpo-deepseek-v3-32n8g.yaml.swp differ
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.yaml
new file mode 100644
index 0000000000..2270d5e272
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.yaml
@@ -0,0 +1,46 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g
+policy:
+  model_name: Qwen/Qwen3-30B-A3B
+  train_micro_batch_size: 1
+  max_total_sequence_length: 40960
+  dtensor_cfg:
+    enabled: false
+  optimizer: null
+  scheduler: null
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 1
+    expert_model_parallel_size: 8
+    sequence_parallel: true
+    context_parallel_size: 8
+    optimizer:
+      lr: 3.0e-07
+      min_lr: 3.0e-08
+    scheduler:
+      lr_warmup_iters: 50
+      lr_warmup_init: 3.0e-08
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False
+    activation_checkpointing: true
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+logger:
+  log_dir: logs/grpo-qwen3-30ba3b-4n8g
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-30ba3b-4n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.yaml b/examples/configs/recipes/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.yaml
new file mode 100644
index 0000000000..f6b01fc444
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.yaml
@@ -0,0 +1,9 @@
+defaults: ./sft-gpt-oss-20b-1n8g-fsdp8ep8-automodel.yaml
+policy:
+  dtensor_cfg:
+    expert_parallel_size: 4
+checkpointing:
+  checkpoint_dir: results/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.yaml
new file mode 100644
index 0000000000..77c175fadf
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.yaml
@@ -0,0 +1,15 @@
+defaults: ./sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 2
+  make_sequence_length_divisible_by: 2
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron
+logger:
+  wandb:
+    name: sft-llama3.1-70b-8n4g-tp2pp2-long-megatron
+  tensorboard:
+    log_dir: tb_logs-sft-llama3.1-70b-8n4g-tp2pp2-long-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
index aa009da464..bb43955812 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
@@ -43,12 +43,16 @@ policy:
       weight_decay: 0.01
       eps: 1.0e-08
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   monitor_gpus: false
   wandb:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.yaml
new file mode 100644
index 0000000000..a1494099f7
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.yaml
@@ -0,0 +1,10 @@
+defaults: ./sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-1n4g-fsdp2tp1-long
+logger:
+  log_dir: logs/sft-llama3.1-8b-1n4g-fsdp2tp1-long
+  wandb:
+    name: sft-llama3.1-8b-1n4g-fsdp2tp1-long
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml
index 88d446283d..a5745e983d 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml
@@ -28,18 +28,22 @@ policy:
       weight_decay: 0.01
       eps: 1.0e-08
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   wandb:
     project: nemo-rl
     name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   tensorboard:
-    log_dir: tb_logs-sft-dev-squad
+    log_dir: tb_logs-sft-dev-openmathinstruct2
 cluster:
   gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
index 86db9da5e0..0e627a2aed 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
@@ -24,18 +24,22 @@ policy:
       weight_decay: 0.01
       eps: 1.0e-08
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   wandb:
     project: nemo-rl
     name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   tensorboard:
-    log_dir: tb_logs-sft-dev-squad
+    log_dir: tb_logs-sft-dev-openmathinstruct2
 cluster:
   gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-lora.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-lora.yaml
index 784e4a02d5..093b22051a 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-lora.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-lora.yaml
@@ -26,9 +26,12 @@ policy:
       weight_decay: 0.01
       eps: 1.0e-08
 data:
-  dataset_name: tulu3_sft_mixture
   add_generation_prompt: true
-  seed: 42
+  train:
+    dataset_name: tulu3_sft_mixture
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
 logger:
   log_dir: logs/sft-tmblog-llama3.1-8b
   tensorboard_enabled: false
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml
index 31b7538c1c..7d179fa103 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml
@@ -22,12 +22,16 @@ policy:
       weight_decay: 0.01
       eps: 1.0e-08
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2
   wandb:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-lora.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-lora.yaml
index b6d9751a67..b2b76c0afd 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-lora.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-lora.yaml
@@ -29,9 +29,12 @@ policy:
   max_total_sequence_length: 4096
   make_sequence_length_divisible_by: 2
 data:
-  dataset_name: tulu3_sft_mixture
   add_generation_prompt: true
-  seed: 42
+  train:
+    dataset_name: tulu3_sft_mixture
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
 logger:
   log_dir: logs/sft-tmblog-llama3.1-8b
   tensorboard_enabled: false
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml
index 3afca7ba02..aa62330e3e 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml
@@ -30,12 +30,16 @@ policy:
     scheduler:
       lr_warmup_init: 1.9999e-65
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.1-8b-1n8g-megatron
   wandb:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml
index 2c08bef6f6..7e9452dff7 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml
@@ -28,12 +28,16 @@ policy:
     scheduler:
       lr_warmup_init: 1.9999e-65
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.1-8b-1n8g-megatron
   wandb:
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.yaml
new file mode 100644
index 0000000000..ddcd49be1b
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.yaml
@@ -0,0 +1,10 @@
+defaults: ./sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
+checkpointing:
+  checkpoint_dir: results/sft-llama3.2-1b-1n4g-fsdp2tp1
+logger:
+  log_dir: logs/sft-llama3.2-1b-1n4g-fsdp2tp1
+  wandb:
+    name: sft-llama3.2-1b-1n4g-fsdp2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
index 77ff8aac89..d7e56efda9 100644
--- a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
@@ -9,12 +9,16 @@ policy:
     name: meta-llama/Llama-3.2-1B
   make_sequence_length_divisible_by: 1
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
-  seed: 42
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
   wandb:
diff --git a/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.yaml b/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.yaml
new file mode 100644
index 0000000000..a774ab0689
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.yaml
@@ -0,0 +1,11 @@
+defaults: ./sft-nanov3-30BA3B-2n8g-fsdp2-lora.yaml
+logger:
+  wandb:
+    name: sft-nanov3-30BA3B-2n4g-fsdp2-lora
+  tensorboard:
+    log_dir: tb_logs-sft-nanov3-30BA3B-2n4g-fsdp2-lora
+  mlflow:
+    run_name: sft-nanov3-30BA3B-2n4g-fsdp2-lora
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2.yaml b/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2.yaml
new file mode 100644
index 0000000000..9a3b333bcc
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-nanov3-30BA3B-2n4g-fsdp2.yaml
@@ -0,0 +1,11 @@
+defaults: ./sft-nanov3-30BA3B-2n8g-fsdp2.yaml
+logger:
+  wandb:
+    name: sft-nanov3-30BA3B-2n4g-fsdp2
+  tensorboard:
+    log_dir: tb_logs-sft-nanov3-30BA3B-2n4g-fsdp2
+  mlflow:
+    run_name: sft-nanov3-30BA3B-2n4g-fsdp2
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-nemotron-super-49b-8n8g-fsdp2tp4cp8-tulu-v3.yaml.disabled b/examples/configs/recipes/llm/sft-nemotron-super-49b-8n8g-fsdp2tp4cp8-tulu-v3.yaml.disabled
index d224a6d51f..1bf5502c21 100644
--- a/examples/configs/recipes/llm/sft-nemotron-super-49b-8n8g-fsdp2tp4cp8-tulu-v3.yaml.disabled
+++ b/examples/configs/recipes/llm/sft-nemotron-super-49b-8n8g-fsdp2tp4cp8-tulu-v3.yaml.disabled
@@ -44,9 +44,12 @@ policy:
   - milestones:
     - 10
 data:
-  dataset_name: tulu3_sft_mixture
   num_workers: 20
-  test_size: 0.05
+  train:
+    dataset_name: tulu3_sft_mixture
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
 logger:
   tensorboard_enabled: false
   monitor_gpus: false
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml
index c94683c61f..8373a788ff 100644
--- a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -15,11 +15,16 @@ policy:
     tensor_parallel_size: 8
   make_sequence_length_divisible_by: 8
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
   wandb:
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n4g-megatron.yaml b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n4g-megatron.yaml
new file mode 100644
index 0000000000..aad3f5c8e0
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n4g-megatron.yaml
@@ -0,0 +1,15 @@
+defaults: ./sft-qwen2.5-math7b-2n8g-megatron.yaml
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 2
+    context_parallel_size: 1
+logger:
+  wandb:
+    name: sft-qwen2.5-math7b-2n4g-megatron
+  tensorboard:
+    log_dir: tb_logs-sft-qwen2.5-math7b-2n4g-megatron
+  mlflow:
+    run_name: sft-qwen2.5-math7b-2n4g-megatron
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml
index 299e426084..d3bdd77bb2 100644
--- a/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml
@@ -33,12 +33,17 @@ policy:
     enabled: true
   make_sequence_length_divisible_by: 32
 data:
-  dataset_name: openmathinstruct2
-  prompt_file: examples/prompts/math.txt
-  split: train_1M
   add_generation_prompt: true
-  output_key: generated_solution
   num_workers: 8
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05
+    seed: ${sft.seed}
+  validation: null
+  default:
+    prompt_file: examples/prompts/math.txt
 logger:
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.yaml b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.yaml
new file mode 100644
index 0000000000..ef2c181f5e
--- /dev/null
+++ b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.yaml
@@ -0,0 +1,10 @@
+defaults: ./vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n8g-dtensor2tp1.v1.yaml
+checkpointing:
+  checkpoint_dir: results/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1
+logger:
+  log_dir: logs/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1
+  wandb:
+    name: vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1
+cluster:
+  gpus_per_node: 4
+
diff --git a/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.yaml b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.yaml
new file mode 100644
index 0000000000..fb70eedb20
--- /dev/null
+++ b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.yaml
@@ -0,0 +1,10 @@
+defaults: ./vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n8g-megatrontp2.v1.yaml
+policy:
+  make_sequence_length_divisible_by: 1
+checkpointing:
+  checkpoint_dir: results/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1
+logger:
+  wandb:
+    name: vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1
+cluster:
+  gpus_per_node: 4
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index 747564f422..728b1c47ff 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -113,6 +113,9 @@ policy:
     bias_activation_fusion: True
     defer_fp32_logits: False
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
 
     peft:
       enabled: false
@@ -179,24 +182,36 @@ data:
   shuffle: true
   num_workers: 1
 
-  dataset_name: "squad"
+  # dataset
+  train:
+    dataset_name: "squad"
+    split: "train"
+  validation:
+    dataset_name: "squad"
+    split: "validation"
+  # default settings for all datasets
+  default:
+    prompt_file: null
+    system_prompt_file: null
+    processor: "sft_processor"
   # You can use custom response datasets for training and validation. For example:
-  #   data:
-  #     dataset_name: ResponseDataset
-  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
-  #     val_data_path: <PathToValidationDataset>
-  #     input_key: <QuestionKey>, default is "input"
-  #     output_key: <AnswerKey>, default is "output"
-  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
-  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # train:
+  #   # this dataset will override input_key and use the default values for other vars
+  #   data_path: /path/to/local/train_dataset.jsonl
+  #   input_key: question
+  # validation:
+  #   # this dataset will use the default values for other vars except data_path
+  #   data_path: /path/to/local/val_dataset.jsonl
+  # default:
+  #   # will use below vars as default values if dataset doesn't specify it
+  #   dataset_name: ResponseDataset
+  #   input_key: input
+  #   output_key: output
+  #   prompt_file: null
+  #   system_prompt_file: null
+  #   processor: "sft_processor"
   # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/sft.md#datasets for more details.
 
-  ## unused with squad dataset
-  prompt_file: null
-  split: null
-  output_key: null
-  seed: null
-
 
   ## OpenAI format specific configs
   # train_data_path: "/path/to/train.jsonl"  # Path to training data
@@ -216,15 +231,15 @@ logger:
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "sft-dev"
-    name: "sft-dev-${data.dataset_name}"
+    name: "sft-dev-${data.train.dataset_name}"
   swanlab:
     project: "sft-dev"
-    name: "sft-dev-${data.dataset_name}"
+    name: "sft-dev-${data.train.dataset_name}"
   tensorboard:
-    log_dir: "tb_logs-sft-dev-${data.dataset_name}"
+    log_dir: "tb_logs-sft-dev-${data.train.dataset_name}"
   mlflow:
     experiment_name: "sft-dev"
-    run_name: "sft-dev-${data.dataset_name}"
+    run_name: "sft-dev-${data.train.dataset_name}"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/sft_openmathinstruct2.yaml b/examples/configs/sft_openmathinstruct2.yaml
index 25368f7df5..fee5e7a06d 100644
--- a/examples/configs/sft_openmathinstruct2.yaml
+++ b/examples/configs/sft_openmathinstruct2.yaml
@@ -69,15 +69,23 @@ policy:
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
-  dataset_name: "openmathinstruct2"
-  prompt_file: examples/prompts/math.txt
-  split: "train_1M"
   add_bos: true
   add_eos: true
   add_generation_prompt: true
-  output_key: 'generated_solution'
   shuffle: true
 
+  # dataset
+  train:
+    dataset_name: OpenMathInstruct-2
+    output_key: generated_solution
+    split: train_1M
+    split_validation_size: 0.05 # use 5% of the training data as validation data
+    seed: ${sft.seed} # seed for train/validation split when split_validation_size > 0
+  validation: null
+  # default settings for all datasets
+  default:
+    prompt_file: examples/prompts/math.txt
+
 logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
diff --git a/examples/configs/sft_openmathinstruct2_megatron.yaml b/examples/configs/sft_openmathinstruct2_megatron.yaml
index b0f94fff6d..18aba3597b 100644
--- a/examples/configs/sft_openmathinstruct2_megatron.yaml
+++ b/examples/configs/sft_openmathinstruct2_megatron.yaml
@@ -92,6 +92,9 @@ policy:
     # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
     bias_activation_fusion: True
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
 
     env_vars:
       PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
@@ -125,14 +128,6 @@ policy:
   optimizer: null
 
 data:
-  max_input_seq_length: ${policy.max_total_sequence_length}
-  dataset_name: "openmathinstruct2"
-  prompt_file: examples/prompts/math.txt
-  split: "train_1M"
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: true
-  output_key: 'generated_solution'
   num_workers: 1
 
 logger:
diff --git a/examples/configs/sft_vlm_3B.yaml b/examples/configs/sft_vlm_3B.yaml
index 5615e2f99d..b67a0d2087 100644
--- a/examples/configs/sft_vlm_3B.yaml
+++ b/examples/configs/sft_vlm_3B.yaml
@@ -23,12 +23,20 @@ checkpointing:
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
-  dataset_name: "clevr_cogent"
   add_bos: true
   add_eos: true
   add_generation_prompt: false
-  split: trainA
-  prompt_file: null
+
+  # dataset
+  train:
+    dataset_name: clevr-cogent
+    split: train
+  validation:
+    dataset_name: clevr-cogent
+    split: valA
+  # default settings for all datasets
+  default:
+    prompt_file: null
 
 logger:
   log_dir: "logs"  # Base directory for all logs
@@ -37,9 +45,9 @@ logger:
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "sft-dev"
-    name: "sft-dev-${data.dataset_name}"
+    name: "sft-dev-${data.train.dataset_name}"
   tensorboard:
-    log_dir: "tb_logs-sft-dev-${data.dataset_name}"
+    log_dir: "tb_logs-sft-dev-${data.train.dataset_name}"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/vlm_grpo_3B.yaml b/examples/configs/vlm_grpo_3B.yaml
index 47233d87db..9eb3ea9d59 100644
--- a/examples/configs/vlm_grpo_3B.yaml
+++ b/examples/configs/vlm_grpo_3B.yaml
@@ -104,6 +104,9 @@ policy:
     bias_activation_fusion: True
     defer_fp32_logits: False
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
 
     optimizer:
       optimizer: "adam"
@@ -228,14 +231,23 @@ policy:
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
-  prompt_file: "examples/prompts/clevr_cogent_cot.txt"
-  system_prompt_file: null
-  dataset_name: "clevr-cogent"
-  env_name: "clevr-cogent"
-  split: "trainA"
   shuffle: true
   num_workers: 1
 
+  # dataset
+  train:
+    dataset_name: clevr-cogent
+    split: train
+  validation:
+    dataset_name: clevr-cogent
+    split: valA
+  # default settings for all datasets
+  default:
+    prompt_file: examples/prompts/clevr_cogent_cot.txt
+    system_prompt_file: null
+    processor: "vlm_hf_data_processor"
+    env_name: "clevr-cogent"
+
 env:
   clevr-cogent:
     num_workers: 8
diff --git a/examples/configs/vlm_grpo_3B_megatron.yaml b/examples/configs/vlm_grpo_3B_megatron.yaml
index 64f8ea158d..8bb4fb30e1 100644
--- a/examples/configs/vlm_grpo_3B_megatron.yaml
+++ b/examples/configs/vlm_grpo_3B_megatron.yaml
@@ -146,6 +146,9 @@ policy:
     bias_activation_fusion: True
     defer_fp32_logits: False
     moe_per_layer_logging: False
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
     optimizer:
       optimizer: adam
       lr: 2.0e-07
@@ -180,13 +183,21 @@ policy:
       data_parallel_sharding_strategy: optim_grads_params
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
-  prompt_file: examples/prompts/clevr_cogent_cot.txt
-  system_prompt_file: null
-  dataset_name: clevr-cogent
-  env_name: "clevr-cogent"
-  split: trainA
   shuffle: true
   num_workers: 1
+  # dataset
+  train:
+    dataset_name: clevr-cogent
+    split: train
+  validation:
+    dataset_name: clevr-cogent
+    split: valA
+  # default settings for all datasets
+  default:
+    prompt_file: examples/prompts/clevr_cogent_cot.txt
+    system_prompt_file: null
+    processor: "vlm_hf_data_processor"
+    env_name: "clevr-cogent"
 env:
   clevr-cogent:
     num_workers: 8
diff --git a/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml b/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml
deleted file mode 100644
index d6d550a12c..0000000000
--- a/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml
+++ /dev/null
@@ -1,278 +0,0 @@
-grpo:
-  max_num_epochs: 1
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 16
-  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
-  max_num_steps: 1000000
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: true
-  overlong_filtering: false
-  max_val_samples: null   # inferred from size of val dataset. for multi evals, repeat val ds via `num_repeats` in `ng_prepare_data`.
-  val_batch_size: null
-  seed: 42
-  use_dynamic_sampling: false
-  dynamic_sampling_max_gen_batches: 10
-  batch_multiplier: 1
-  reward_shaping:
-    enabled: false
-    overlong_buffer_length: 128
-    overlong_buffer_penalty: 1
-    max_response_length: ${policy.max_total_sequence_length}
-  reward_scaling:
-    enabled: false
-    source_min: 0.0
-    source_max: 1.0
-    target_min: 0.0
-    target_max: 1.0
-  skip_reference_policy_logprobs_calculation: true
-
-loss_fn:
-  reference_policy_kl_penalty: 0
-  reference_policy_kl_type: "k3"
-  kl_input_clamp_value: 20.0
-  kl_output_clamp_value: 10.0
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
-  use_on_policy_kl_approximation: false
-  truncated_importance_sampling_ratio: null
-  use_importance_sampling_correction: false
-  token_level_loss: true
-  force_on_policy_ratio: false  # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt)
-
-checkpointing:
-  enabled: true
-  checkpoint_dir: "results/grpo"
-  metric_name: "val:accuracy"
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 1
-  checkpoint_must_save_by: null
-
-policy:
-  model_name: "Qwen/Qwen3-4B-Instruct-2507"
-  tokenizer:
-    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
-    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
-  hf_config_overrides: {}
-  train_global_batch_size: ${mul:${grpo.num_prompts_per_step}, ${grpo.num_generations_per_prompt}}  # Match the total rollouts per step
-  train_micro_batch_size: 1
-  logprob_batch_size: 1
-  generation_batch_size: 32 # Only used when generating using HF backend
-  max_total_sequence_length: 32768
-  precision: "bfloat16"
-  logprob_chunk_size: 1024
-
-  dtensor_cfg:
-    _v2: false
-    enabled: true
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: true
-    tensor_parallel_size: 2
-    context_parallel_size: 1
-    custom_parallel_plan: null
-    clear_cache_every_n_steps: null
-  
-  megatron_cfg:
-    enabled: false
-    # We might want to consider setting this value higher (e.g. to 1) and raising the vllm generation max mem utilization
-    empty_unused_memory_level: 0
-    activation_checkpointing: true
-    converter_type: "Qwen2ForCausalLM"  # Apparently this is comptible with Qwen 3 dense models.
-    tensor_model_parallel_size: 1
-    expert_tensor_parallel_size: 1
-    expert_model_parallel_size: 1
-    pipeline_model_parallel_size: 1
-    num_layers_in_first_pipeline_stage: null
-    num_layers_in_last_pipeline_stage: null
-    context_parallel_size: 1
-    pipeline_dtype: ${policy.precision}
-    sequence_parallel: false
-    freeze_moe_router: true
-    moe_router_dtype: "fp64"
-    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
-    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
-    #gives ~20% training perf speedup with sequence packing
-    apply_rope_fusion: True
-    defer_fp32_logits: true
-    moe_permute_fusion: false
-    bias_activation_fusion: True
-    moe_per_layer_logging: False
-
-    optimizer:
-      optimizer: "adam"
-      lr: 5.0e-6
-      min_lr: 5.0e-7
-      weight_decay: 0.01
-      bf16: true
-      fp16: false
-      params_dtype: "float32"
-
-      #adam
-      adam_beta1: 0.9
-      adam_beta2: 0.999
-      adam_eps: 1e-8
-
-      #sgd
-      sgd_momentum: 0.9
-
-      #distributed optimizer
-      use_distributed_optimizer: true
-      use_precision_aware_optimizer: true
-
-      # optimizer cpu offload
-      optimizer_cpu_offload: false
-      optimizer_offload_fraction: 0.0
-
-      clip_grad: ${policy.max_grad_norm}
-
-    scheduler:
-      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      weight_decay_incr_style: "constant"
-      lr_decay_style: "constant"
-      lr_decay_iters: null
-      lr_warmup_iters: 13
-      lr_warmup_init: 5.0e-7
-
-    distributed_data_parallel_config:
-      grad_reduce_in_fp32: false
-      overlap_grad_reduce: true
-      overlap_param_gather: true
-      use_custom_fsdp: false
-      data_parallel_sharding_strategy: "optim_grads_params"
-
-    env_vars: null
-
-  # See docs/design-docs/sequence-packing-and-dynamic-batching.md 
-  # for more details on dynamic batching and sequence packing.
-  dynamic_batching:
-    enabled: False
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
-
-  sequence_packing:
-    enabled: false
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    algorithm: "modified_first_fit_decreasing"
-    sequence_length_round: 64
-
-  # makes the training sequence length divisible by the tensor parallel size
-  # this is useful for sequence parallel training
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 1.0e-6
-      weight_decay: 0.01
-      betas: [0.9, 0.999]
-      eps: 1e-8
-      # when using Dtensor, we need to set foreach
-      # and fused to False
-      foreach: False
-      fused: False
-
-  scheduler:
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: []
-
-  generation:
-    backend: "vllm"
-    max_new_tokens: ${policy.max_total_sequence_length}
-    temperature: 1.0
-    top_p: 1.0
-    top_k: null
-    stop_token_ids: null
-    stop_strings: null
-    vllm_cfg:
-      async_engine: true
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      enable_expert_parallel: false
-      expert_parallel_size: 1
-      gpu_memory_utilization: 0.8
-      max_model_len: ${policy.max_total_sequence_length}
-      enforce_eager: false
-      use_deep_gemm: False
-      num_last_layers_in_bf16: 0
-      num_first_layers_in_bf16: 0
-      expose_http_server: true
-      skip_tokenizer_init: false
-      http_server_serving_chat_kwargs:
-        # This is the tool parser for Qwen 3 4B Instruct. This needs to be changed for other models.
-        enable_auto_tools: true
-        tool_parser: hermes
-        # Enable the appropriate reasoning parser here. Since this model is an instruct model, we comment it out.
-        # reasoning_parser: deepseek_r1
-    vllm_kwargs:
-      compilation_config:
-        # when enforce_eager is False, set ++policy.generation.vllm_kwargs.compilation_config.use_inductor=False for better accuracy,
-        # with the flag, vllm will use the custom CUDA kernels instead of the Triton kernels generated by torch.compile
-        # for more details, see convergence issue https://github.com/NVIDIA-NeMo/RL/issues/998
-        use_inductor: False
-    colocated:
-      # true: generation shares training GPUs
-      # false: uses dedicated generation resources
-      enabled: true
-      # only relevant when enabled is false
-      resources:
-        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
-        num_nodes: null # Decides number of nodes to be dedicated to generation
-
-data:
-  train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl
-  validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl
-  shuffle: true
-  num_workers: 0
-
-env:
-  should_use_nemo_gym: true
-  should_log_nemo_gym_responses: true  # If you have low logging storage, set this to false
-  nemo_gym:  # This is passed into NeMo-Gym as the initial_global_config_dict
-    config_paths:
-    - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml  # Required! And it must be *for_training
-    - resources_servers/library_judge_math/configs/library_judge_math.yaml
-    library_judge_math:
-      resources_servers:
-        library_judge_math:
-          judge_model_server:
-            name: policy_model
-          should_use_judge: false
-
-logger:
-  log_dir: "logs"  # Base directory for all logs
-  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
-  wandb_enabled: true
-  tensorboard_enabled: false
-  mlflow_enabled: false  # Disable MLflow logging
-  swanlab_enabled: false
-  monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
-  wandb:
-    project: "grpo-dev"
-    name: "grpo-dev-logger"
-  swanlab:
-    project: "grpo-dev"
-    name: "grpo-dev-logger"
-  tensorboard: {}
-  mlflow:
-    experiment_name: "grpo-dev"
-    run_name: "grpo-dev-logger"
-  gpu_monitoring:
-    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
-    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
-
-cluster:
-  gpus_per_node: 8
-  num_nodes: 8
diff --git a/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml b/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml
new file mode 100644
index 0000000000..da7a392dae
--- /dev/null
+++ b/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml
@@ -0,0 +1,155 @@
+defaults: "grpo_workplace_assistant_nemotron_nano_v2_9b.yaml"
+
+grpo:
+  max_num_epochs: 10
+  # We observe MoE likes more data per optimization step. Here we increase the num prompts per step from the dense 64 to 256.
+  # We retain the 16 generations per prompt for now. Later on this may change for agentic tasks as the action/sample space grows.
+  # We take up to 16 steps off policy, which is 256 * 16 = 4096
+  num_prompts_per_step: 4096
+  num_generations_per_prompt: 16
+  # Each "step" in NeMo RL is all 16 minibatch steps we want to take. So we val and save every step.
+  val_period: 1
+  # The advantages are much larger and this becomes non-trivially expensive.
+  calculate_advantages_on_gpu: true
+
+# We use GSPO rather than GRPO for MoE models
+loss_fn:
+  reference_policy_kl_penalty: 0
+  ratio_clip_min: 3e-4
+  ratio_clip_max: 3e-4
+  ratio_clip_c: null
+  use_on_policy_kl_approximation: false
+  # We observe importance sampling correction here to have very interesting effects on training dynamics. Usually it is fine to leave this off.
+  use_importance_sampling_correction: false
+  # sequence_level_importance_ratios turns GRPO -> GSPO
+  sequence_level_importance_ratios: true
+  # As of Mon Oct 13, token level loss as formulated in the GSPO paper is not yet supported in NeMo RL.
+  token_level_loss: false
+
+policy:
+  model_name: Qwen/Qwen3-30B-A3B-Instruct-2507
+  logprob_chunk_size: null
+
+  # This is one minibatch, which is 256 prompts per step * 16 rollouts per prompt.
+  train_global_batch_size: 4096
+
+  # max_total_sequence_length goes up to 131072, but we default to 32768 to make training more efficient for this instruct model which doesn't initially have such long output length.
+  max_total_sequence_length: 32768
+
+  # Don't use dtensor. optimizer and scheduler are dtensor only (mcore has its own version of these)
+  dtensor_cfg:
+    enabled: False
+  optimizer: null
+  scheduler: null
+
+  # As of Thu Oct 02, 2025, we need sequence packing enabled to use context parallelism (CP) in mcore.
+  sequence_packing:
+    enabled: true
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+      # This is a very low GPU mem utilization. We GPU OOM in two places:
+      # Refit after train, refit before validation.
+      gpu_memory_utilization: 0.7
+
+      http_server_serving_chat_kwargs:
+        # This is the tool parser for Qwen 3 30B A3B Instruct. This needs to be changed for other models.
+        enable_auto_tools: true
+        tool_parser: hermes
+        # Enable the appropriate reasoning parser here. Since this model is an instruct model, we comment it out.
+        # reasoning_parser: deepseek_r1
+
+  # Needs to be set to whatever backend TP size.
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    activation_checkpointing: true
+    converter_type: "LlamaForCausalLM"  # This arg is deprecated, and we can set it to anything.
+    tensor_model_parallel_size: 4
+    expert_tensor_parallel_size: 1
+    # We set this to 8, the number of GPUs in one node
+    expert_model_parallel_size: 8
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    # The context parallel size times the tensor model parallel size should equal 8.
+    context_parallel_size: 2
+    pipeline_dtype: ${policy.precision}
+    # Sequence parallel is required for expert parallel
+    sequence_parallel: true
+    # Apparently freezing the MoE router and using fp64 here stabilizes training
+    # This is possibly related to some refit issues.
+    freeze_moe_router: false
+    moe_router_dtype: fp32
+    moe_router_load_balancing_type: none # "seq_aux_loss" causes logprob error divergence for grpo. As of Jan 06, 2025, global_aux_loss is also not supported properly in mcore.
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    #gives ~20% training perf speedup with sequence packing
+    moe_permute_fusion: true
+    apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: true
+    moe_per_layer_logging: true
+
+    optimizer:
+      optimizer: "adam"
+      # As of Mon Oct 13, we default to 2e-6 here, but it's possible this value may increase/decrease depending on our subsequent observations.
+      lr: 2.0e-6
+      min_lr: ${policy.megatron_cfg.optimizer.lr}
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1e-8
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      clip_grad: ${policy.max_grad_norm}
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: null
+      lr_warmup_iters: 0
+      lr_warmup_init: ${policy.megatron_cfg.optimizer.lr}
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+    env_vars: null
+
+checkpointing:
+  # This assumes a slurm job timeout of 4 hours.
+  # 1. It will usually take a 10-15 minutes to spin up the training job and for the timeout iterations to start.
+  # 2. The next step may also be a validation step which takes extra long.
+  #     1. For this config Qwen 3 30BA3B on math with 32k context length, the validation could take up to 10 mins.
+  # 3. The step time for this config on 32 nodes takes around 30 mins.
+  # 4. The checkpoint time for this model is around 10 mins.
+  checkpoint_must_save_by: "00:03:30:00"
+  save_period: 1
diff --git a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
index dea76e41cf..ae3740850d 100644
--- a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
+++ b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
@@ -100,6 +100,9 @@ policy:
     apply_rope_fusion: True
     defer_fp32_logits: false
     moe_permute_fusion: false
+    moe_enable_deepep: false
+    moe_token_dispatcher_type: "allgather"
+    moe_shared_expert_overlap: false
 
     optimizer:
       optimizer: "adam"
@@ -201,7 +204,7 @@ policy:
       kv_cache_dtype: "auto"
       expose_http_server: true
       skip_tokenizer_init: false
-      tool_parser_plugin: ???
+      # tool_parser_plugin: ???  # This is set to the path for Nemotron Nano v2
       http_server_serving_chat_kwargs:
         # Workplace assistant uses 26 tools, so we enable auto_tools.
         # For Nemotron Nano v2, we use the dedicated `nemotron_json` tool parser
@@ -227,9 +230,8 @@ policy:
 data:
   # Using the prepared train and validation datasets (downloaded from HuggingFace and split 90/10)
   # Train: 1129 samples, Validation: 126 samples
-  train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/train.jsonl
-  validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/validation.jsonl
-  agent_name: workplace_assistant_simple_agent
+  train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/workplace_assistant/train.jsonl
+  validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/workplace_assistant/validation.jsonl
   shuffle: true
   num_workers: 0
 
@@ -237,13 +239,16 @@ env:
   should_use_nemo_gym: true
   should_log_nemo_gym_responses: true  # If you have low logging storage, set this to false
   nemo_gym:  # This is passed into NeMo-Gym as the initial_global_config_dict
+    is_trajectory_collection: false  # Set this to true to enable trajectory collection (no training). You may also want to increase `policy.generation.vllm_cfg.gpu_memory_utilization`
     config_paths:
     - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml  # Required! And it must be *for_training
     - resources_servers/workplace_assistant/configs/workplace_assistant.yaml
-    workplace_assistant_simple_agent:
-      responses_api_agents:
-        simple_agent:
-          max_steps: 6  # Workplace assistant allows up to 6 tool-calling steps per task
+    # You can uncomment these during `ng_prepare_data` and here to train on multiple environments at once!
+    # - resources_servers/math_with_judge/configs/math_with_judge.yaml
+    # - resources_servers/code_gen/configs/code_gen.yaml
+    # - resources_servers/mcqa/configs/mcqa.yaml
+    # - resources_servers/instruction_following/configs/instruction_following.yaml
+    # - resources_servers/structured_outputs/configs/structured_outputs_json.yaml
     policy_model:
       responses_api_models:
         vllm_model:
@@ -252,6 +257,10 @@ env:
           extra_body:
             chat_template_kwargs:
               enable_thinking: false
+    code_gen:
+      resources_servers:
+        code_gen:
+          num_processes: ${mul:64, ${cluster.num_nodes}}
 
 logger:
   log_dir: "logs/grpo-workplace-assistant-nemotron-nano-v2-9b"  # Base directory for all logs
diff --git a/examples/nemo_gym/launch_nemo_gym_multinode_training.sh b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh
index 37ede71772..74b51c1527 100755
--- a/examples/nemo_gym/launch_nemo_gym_multinode_training.sh
+++ b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # ----- PARAMETERS -----
-# WANDB_API_KEY, EXP_NAME, NUM_ACTOR_NODES, REPO_LOCATION, CONTAINER_IMAGE_PATH, SLURM_ACCOUNT, SLURM_PARTITION
+# WANDB_API_KEY, HF_TOKEN, EXP_NAME, NUM_ACTOR_NODES, NUM_SLURM_NODES (optional), REPO_LOCATION, CONTAINER_IMAGE_PATH, SLURM_ACCOUNT, SLURM_PARTITION
 
 # ray.sub needs to be launched from the NeMo-RL root directory
 cd $REPO_LOCATION
@@ -23,6 +23,7 @@ read -r -d '' COMMAND <<EOF
 cd ${REPO_LOCATION}
 
 HF_HOME=$PWD/.cache/ \
+HF_TOKEN=$HF_TOKEN \
 WANDB_API_KEY=$WANDB_API_KEY \
 uv run python examples/nemo_gym/run_grpo_nemo_gym.py \
     ++cluster.num_nodes=$NUM_ACTOR_NODES \
@@ -36,11 +37,13 @@ echo -e "Running command:\n$COMMAND"
 
 mount=$(findmnt -n -o TARGET --target .)
 
+FINAL_NUM_SLURM_NODES="${NUM_SLURM_NODES:-$NUM_ACTOR_NODES}"
+
 COMMAND=$COMMAND \
 CONTAINER=$CONTAINER_IMAGE_PATH \
 MOUNTS=$mount:$mount \
 sbatch \
-    --nodes=$NUM_ACTOR_NODES \
+    --nodes=$FINAL_NUM_SLURM_NODES \
     --account=$SLURM_ACCOUNT \
     --partition=$SLURM_PARTITION \
     --time=4:0:0 \
diff --git a/examples/run_distillation_math.py b/examples/run_distillation_math.py
index 51fc4b4283..60cf58909d 100644
--- a/examples/run_distillation_math.py
+++ b/examples/run_distillation_math.py
@@ -14,27 +14,13 @@
 
 import argparse
 import os
-from collections import defaultdict
-from typing import Any, Optional
 
 from omegaconf import OmegaConf
-from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.distillation import MasterConfig, distillation_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.interfaces import (
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
-from nemo_rl.data.processors import math_hf_data_processor
-from nemo_rl.distributed.ray_actor_environment_registry import (
-    get_actor_python_env,
-)
+from nemo_rl.data.utils import setup_data_with_envs
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.interfaces import EnvironmentInterface
-from nemo_rl.environments.math_environment import MathEnvironment
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -57,76 +43,6 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-# ===============================================================================
-#                             Math Data Processor
-# ===============================================================================
-TokenizerType = PreTrainedTokenizerBase
-
-
-def setup_data(
-    tokenizer: TokenizerType,
-    data_config: DataConfig,
-    env_configs: dict[str, Any],
-    seed: int,
-) -> tuple[
-    AllTaskProcessedDataset,
-    Optional[AllTaskProcessedDataset],
-    dict[str, EnvironmentInterface],
-    dict[str, EnvironmentInterface],
-]:
-    print("\n▶ Setting up data...")
-    math_task_spec = TaskDataSpec(
-        task_name="math",
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
-
-    # load dataset
-    data: Any = load_response_dataset(data_config, seed)
-    task_name = (
-        data.task_name if hasattr(data, "task_name") else data.task_spec.task_name
-    )
-    # data processor
-    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
-    )
-    task_data_processors[task_name] = (math_task_spec, math_hf_data_processor)
-
-    # setup math environment
-    math_env = MathEnvironment.options(  # type: ignore # it's wrapped with ray.remote
-        runtime_env={
-            "py_executable": get_actor_python_env(
-                "nemo_rl.environments.math_environment.MathEnvironment"
-            ),
-            "env_vars": dict(os.environ),  # Pass thru all user environment variables
-        }
-    ).remote(env_configs["math"])
-
-    dataset = AllTaskProcessedDataset(
-        data.formatted_ds["train"],
-        tokenizer,
-        math_task_spec,
-        task_data_processors,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
-
-    val_dataset: Optional[AllTaskProcessedDataset] = None
-    if data.formatted_ds["validation"]:
-        val_dataset = AllTaskProcessedDataset(
-            data.formatted_ds["validation"],
-            tokenizer,
-            math_task_spec,
-            task_data_processors,
-            max_seq_length=data_config["max_input_seq_length"],
-        )
-    else:
-        val_dataset = None
-
-    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: math_env)
-    task_to_env[task_name] = math_env
-    return dataset, val_dataset, task_to_env, task_to_env
-
-
 def main() -> None:
     """Main entry point."""
     # Parse arguments
@@ -163,7 +79,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(tokenizer, config["data"], config["env"], 42)
+    ) = setup_data_with_envs(tokenizer, config["data"], config["env"])
 
     (
         student_policy,
diff --git a/examples/run_grpo.py b/examples/run_grpo.py
index cd9d47f628..83fd9f1d97 100644
--- a/examples/run_grpo.py
+++ b/examples/run_grpo.py
@@ -15,24 +15,13 @@
 import argparse
 import os
 import pprint
-from collections import defaultdict
-from typing import Any, Optional
 
 from omegaconf import OmegaConf
-from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.interfaces import (
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
-from nemo_rl.data.processors import math_hf_data_processor
+from nemo_rl.data.utils import setup_data_with_envs
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.interfaces import EnvironmentInterface
-from nemo_rl.environments.utils import create_env
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -53,70 +42,6 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-# ===============================================================================
-#                             Data Processor
-# ===============================================================================
-TokenizerType = PreTrainedTokenizerBase
-
-
-def setup_data(
-    tokenizer: TokenizerType,
-    data_config: DataConfig,
-    env_configs: dict[str, Any],
-    seed: int,
-) -> tuple[
-    AllTaskProcessedDataset,
-    Optional[AllTaskProcessedDataset],
-    dict[str, EnvironmentInterface],
-    dict[str, EnvironmentInterface],
-]:
-    print("\n▶ Setting up envs...")
-    env_name = data_config["env_name"]
-    env = create_env(env_name=env_name, env_configs=env_configs)
-
-    print("\n▶ Setting up data...")
-    default_task_spec = TaskDataSpec(
-        task_name="math_default",
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
-    # define default task data processor
-    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (default_task_spec, math_hf_data_processor))
-    )
-
-    # load dataset
-    data: Any = load_response_dataset(data_config, seed)
-    task_spec = data.task_spec
-    task_name = data.task_name
-    assert hasattr(data, "processor"), "Dataset must have a processor attribute"
-    task_data_processors[task_name] = (task_spec, data.processor)
-
-    dataset = AllTaskProcessedDataset(
-        data.formatted_ds["train"],
-        tokenizer,
-        default_task_spec,  # default task data spec to process any values not specified in the task-specific specs
-        task_data_processors,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
-
-    val_dataset: Optional[AllTaskProcessedDataset] = None
-    if data.formatted_ds["validation"]:
-        val_dataset = AllTaskProcessedDataset(
-            data.formatted_ds["validation"],
-            tokenizer,
-            default_task_spec,
-            task_data_processors,
-            max_seq_length=data_config["max_input_seq_length"],
-        )
-    else:
-        val_dataset = None
-
-    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: env)
-    task_to_env[task_name] = env
-    return dataset, val_dataset, task_to_env, task_to_env
-
-
 def main() -> None:
     """Main entry point."""
     # Parse arguments
@@ -166,7 +91,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(tokenizer, config["data"], config["env"], config["grpo"]["seed"])
+    ) = setup_data_with_envs(tokenizer, config["data"], config["env"])
 
     (
         policy,
diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py
index bf790080d9..83fd9f1d97 100644
--- a/examples/run_grpo_math.py
+++ b/examples/run_grpo_math.py
@@ -15,27 +15,13 @@
 import argparse
 import os
 import pprint
-from collections import defaultdict
-from typing import Any, Optional
 
 from omegaconf import OmegaConf
-from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.interfaces import (
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
-from nemo_rl.data.processors import math_hf_data_processor
-from nemo_rl.distributed.ray_actor_environment_registry import (
-    get_actor_python_env,
-)
+from nemo_rl.data.utils import setup_data_with_envs
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.interfaces import EnvironmentInterface
-from nemo_rl.environments.math_environment import MathEnvironment
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -56,77 +42,6 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-# ===============================================================================
-#                             Math Data Processor
-# ===============================================================================
-TokenizerType = PreTrainedTokenizerBase
-
-
-def setup_data(
-    tokenizer: TokenizerType,
-    data_config: DataConfig,
-    env_configs: dict[str, Any],
-    seed: int,
-) -> tuple[
-    AllTaskProcessedDataset,
-    Optional[AllTaskProcessedDataset],
-    dict[str, EnvironmentInterface],
-    dict[str, EnvironmentInterface],
-]:
-    print("\n▶ Setting up data...")
-    math_task_spec = TaskDataSpec(
-        task_name="math",
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
-
-    # load dataset
-    data: Any = load_response_dataset(data_config, seed)
-    task_name = (
-        data.task_name if hasattr(data, "task_name") else data.task_spec.task_name
-    )
-
-    # data processor
-    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
-    )
-    task_data_processors[task_name] = (math_task_spec, math_hf_data_processor)
-
-    # setup math environment
-    math_env = MathEnvironment.options(  # type: ignore # it's wrapped with ray.remote
-        runtime_env={
-            "py_executable": get_actor_python_env(
-                "nemo_rl.environments.math_environment.MathEnvironment"
-            ),
-            "env_vars": dict(os.environ),  # Pass thru all user environment variables
-        }
-    ).remote(env_configs["math"])
-
-    dataset = AllTaskProcessedDataset(
-        data.formatted_ds["train"],
-        tokenizer,
-        math_task_spec,
-        task_data_processors,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
-
-    val_dataset: Optional[AllTaskProcessedDataset] = None
-    if data.formatted_ds["validation"]:
-        val_dataset = AllTaskProcessedDataset(
-            data.formatted_ds["validation"],
-            tokenizer,
-            math_task_spec,
-            task_data_processors,
-            max_seq_length=data_config["max_input_seq_length"],
-        )
-    else:
-        val_dataset = None
-
-    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: math_env)
-    task_to_env[task_name] = math_env
-    return dataset, val_dataset, task_to_env, task_to_env
-
-
 def main() -> None:
     """Main entry point."""
     # Parse arguments
@@ -176,7 +91,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(tokenizer, config["data"], config["env"], config["grpo"]["seed"])
+    ) = setup_data_with_envs(tokenizer, config["data"], config["env"])
 
     (
         policy,
diff --git a/examples/run_grpo_rm.py b/examples/run_grpo_rm.py
index b36e34bf7e..8688f7b77f 100644
--- a/examples/run_grpo_rm.py
+++ b/examples/run_grpo_rm.py
@@ -15,25 +15,13 @@
 import argparse
 import os
 import pprint
-from collections import defaultdict
-from typing import Any, Optional
 
 from omegaconf import OmegaConf
-from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.interfaces import (
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
-from nemo_rl.data.processors import math_hf_data_processor
-from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
+from nemo_rl.data.utils import setup_data_with_envs
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.interfaces import EnvironmentInterface
-from nemo_rl.environments.reward_model_environment import RewardModelEnvironment
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -60,75 +48,6 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-# ===============================================================================
-#                             Math Data Processor
-# ===============================================================================
-TokenizerType = PreTrainedTokenizerBase
-
-
-def setup_data(
-    tokenizer: TokenizerType,
-    data_config: DataConfig,
-    env_configs: dict[str, Any],
-    seed: int,
-) -> tuple[
-    AllTaskProcessedDataset,
-    Optional[AllTaskProcessedDataset],
-    dict[str, EnvironmentInterface],
-    dict[str, EnvironmentInterface],
-]:
-    print("\n▶ Setting up data...")
-    # load dataset
-    data: Any = load_response_dataset(data_config, seed)
-    task_name = (
-        data.task_name if hasattr(data, "task_name") else data.task_spec.task_name
-    )
-
-    reward_model_task_spec = TaskDataSpec(
-        task_name=task_name,
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
-    # data processor
-    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (reward_model_task_spec, math_hf_data_processor))
-    )
-    task_data_processors[task_name] = (reward_model_task_spec, math_hf_data_processor)
-
-    reward_model_env = RewardModelEnvironment.options(  # type: ignore # it's wrapped with ray.remote
-        runtime_env={
-            "py_executable": get_actor_python_env(
-                "nemo_rl.environments.reward_model_environment.RewardModelEnvironment"
-            ),
-            "env_vars": dict(os.environ),  # Pass thru all user environment variables
-        }
-    ).remote(env_configs["reward_model"])
-
-    dataset = AllTaskProcessedDataset(
-        data.formatted_ds["train"],
-        tokenizer,
-        reward_model_task_spec,
-        task_data_processors,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
-
-    val_dataset: Optional[AllTaskProcessedDataset] = None
-    if data.formatted_ds["validation"]:
-        val_dataset = AllTaskProcessedDataset(
-            data.formatted_ds["validation"],
-            tokenizer,
-            reward_model_task_spec,
-            task_data_processors,
-            max_seq_length=data_config["max_input_seq_length"],
-        )
-    else:
-        val_dataset = None
-
-    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: reward_model_env)
-    task_to_env[task_name] = reward_model_env
-    return dataset, val_dataset, task_to_env, task_to_env
-
-
 def main() -> None:
     """Main entry point."""
     # Parse arguments
@@ -178,7 +97,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(tokenizer, config["data"], config["env"], config["grpo"]["seed"])
+    ) = setup_data_with_envs(tokenizer, config["data"], config["env"])
 
     (
         policy,
diff --git a/examples/run_sft.py b/examples/run_sft.py
index 8f65262c73..e8a240797f 100644
--- a/examples/run_sft.py
+++ b/examples/run_sft.py
@@ -16,17 +16,19 @@
 import os
 import pprint
 from functools import partial
-from typing import Any, Callable, Optional
 
+from datasets import concatenate_datasets
 from omegaconf import OmegaConf
 from transformers import AutoTokenizer
 
 from nemo_rl.algorithms.sft import MasterConfig, setup, sft_train
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.interfaces import DatumSpec, TaskDataSpec
-from nemo_rl.data.llm_message_utils import get_formatted_message_log
+from nemo_rl.data.datasets import (
+    AllTaskProcessedDataset,
+    load_response_dataset,
+    update_single_dataset_config,
+)
 from nemo_rl.distributed.virtual_cluster import init_ray
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -51,104 +53,78 @@ def parse_args():
 # =======================================================
 # Data Processing
 # =======================================================
-def sft_preprocessor(
-    datum_dict: dict[str, Any],
-    task_data_spec: TaskDataSpec,
-    tokenizer,
-    max_seq_length: int,
-    idx: int,
-    add_bos: bool = True,
-    add_eos: bool = True,
-    add_generation_prompt: bool = False,
-    datum_preprocessor: Optional[Callable] = None,
-) -> DatumSpec:
-    """Process a datum dictionary for SFT training."""
-    # optional preprocessor
-    if datum_preprocessor is not None:
-        datum_dict = datum_preprocessor(datum_dict)
-
-    message_log = get_formatted_message_log(
-        datum_dict["messages"],
-        tokenizer,
-        task_data_spec,
-        add_bos_token=add_bos,
-        add_eos_token=add_eos,
-        add_generation_prompt=add_generation_prompt,
-        tools=datum_dict.get("tools", None),  # Pass tools from data if present
-    )
-
-    length = sum(len(m["token_ids"]) for m in message_log)
-
-    loss_multiplier = 1.0
-    if length > max_seq_length:
-        # make smaller and mask out
-        for message in message_log:
-            message["token_ids"] = message["token_ids"][
-                : min(4, max_seq_length // len(message_log))
-            ]
-        loss_multiplier = 0.0
 
-    output = {
-        "message_log": message_log,
-        "length": length,
-        "extra_env_info": None,
-        "loss_multiplier": loss_multiplier,
-        "idx": idx,
-    }
-    return output
 
+# TODO @yukih: move to nemo_rl/data/utils.py after data processor refactored
+def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
+    assert "train" in data_config, (
+        "The dataset config structure is updated. Please refer to https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/sft.md#datasets "
+        "and the Migrate Guide in https://github.com/NVIDIA-NeMo/RL/pull/1649 to update the dataset config."
+    )
 
-def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig, seed: int):
     print("\n▶ Setting up data...")
-
-    # load dataset
-    data = load_response_dataset(data_config, seed)
-    train_dataset = data.formatted_ds["train"]
-    val_dataset = data.formatted_ds["validation"]
-    sft_task_spec = data.task_spec
-    print(
-        f"  ✓ Training and validation datasets loaded with {len(train_dataset)} and {len(val_dataset) if val_dataset else 0} samples, respectively."
+    # setup train dataset
+    if "default" in data_config:
+        update_single_dataset_config(data_config["train"], data_config["default"])
+    data = load_response_dataset(data_config["train"])
+    data_processor = partial(
+        data.processor,
+        add_bos=data_config["add_bos"],
+        add_eos=data_config["add_eos"],
+        add_generation_prompt=data_config["add_generation_prompt"],
     )
+    task_data_processors = {data.task_name: (data.task_spec, data_processor)}
 
-    # add preprocessor if needed
-    datum_preprocessor = None
-    if "dataset_name" in data_config and data_config["dataset_name"] == "clevr_cogent":
-        from nemo_rl.data.datasets.response_datasets.clevr import (
-            format_clevr_cogent_dataset,
-        )
-
-        datum_preprocessor = partial(format_clevr_cogent_dataset, return_pil=True)
-
-    train_dataset = AllTaskProcessedDataset(
-        train_dataset,
+    dataset = AllTaskProcessedDataset(
+        data.dataset,
         tokenizer,
-        sft_task_spec,
-        partial(
-            sft_preprocessor,
+        None,
+        task_data_processors,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+    print(f"  ✓ Training dataset loaded with {len(dataset)} samples.")
+
+    # setup validation dataset
+    val_task_data_processors = {}
+    val_data_list = []
+
+    # validation dataset from train dataset (when train dataset's split_validation_size > 0)
+    if hasattr(data, "val_dataset") and data.val_dataset is not None:
+        val_data_list.append(data.val_dataset)
+        val_task_data_processors = task_data_processors.copy()
+
+    # validation dataset from config
+    if "validation" in data_config and data_config["validation"] is not None:
+        if "default" in data_config:
+            update_single_dataset_config(
+                data_config["validation"], data_config["default"]
+            )
+        val_data = load_response_dataset(data_config["validation"])
+        val_data_list.append(val_data.dataset)
+        val_data_processor = partial(
+            val_data.processor,
             add_bos=data_config["add_bos"],
             add_eos=data_config["add_eos"],
             add_generation_prompt=data_config["add_generation_prompt"],
-            datum_preprocessor=datum_preprocessor,
-        ),
-        max_seq_length=data_config["max_input_seq_length"],
-    )
+        )
+        val_task_data_processors[val_data.task_name] = (
+            val_data.task_spec,
+            val_data_processor,
+        )
 
-    if val_dataset is not None:
+    val_dataset = None
+    if len(val_data_list) > 0:
+        merged_val_data = concatenate_datasets(val_data_list)
         val_dataset = AllTaskProcessedDataset(
-            val_dataset,
+            merged_val_data,
             tokenizer,
-            sft_task_spec,
-            partial(
-                sft_preprocessor,
-                add_bos=data_config.get("add_bos", True),
-                add_eos=data_config.get("add_eos", True),
-                add_generation_prompt=data_config["add_generation_prompt"],
-                datum_preprocessor=datum_preprocessor,
-            ),
+            None,
+            val_task_data_processors,
             max_seq_length=data_config["max_input_seq_length"],
         )
+        print(f"  ✓ Validation dataset loaded with {len(val_dataset)} samples.")
 
-    return train_dataset, val_dataset, sft_task_spec
+    return dataset, val_dataset
 
 
 def main(is_vlm: bool = False):
@@ -186,11 +162,7 @@ def main(is_vlm: bool = False):
     tokenizer = get_tokenizer(config["policy"]["tokenizer"], get_processor=is_vlm)
 
     # setup data
-    (
-        dataset,
-        val_dataset,
-        sft_task_spec,
-    ) = setup_data(tokenizer, config["data"], config["sft"]["seed"])
+    dataset, val_dataset = setup_data(tokenizer, config["data"])
 
     (
         policy,
@@ -212,7 +184,6 @@ def main(is_vlm: bool = False):
         loss_fn,
         master_config,
         logger,
-        sft_task_spec,
         checkpointer,
         sft_save_state,
     )
diff --git a/examples/run_vlm_grpo.py b/examples/run_vlm_grpo.py
index 5e8cb1ef0c..05fc878635 100644
--- a/examples/run_vlm_grpo.py
+++ b/examples/run_vlm_grpo.py
@@ -13,42 +13,15 @@
 # limitations under the License.
 
 import argparse
-import base64
 import os
 import pprint
-from collections import defaultdict
-from io import BytesIO
-from typing import Any, Optional
 
-import requests
 from omegaconf import OmegaConf
-from PIL import Image
-from transformers import AutoProcessor
 
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
-from nemo_rl.data.datasets.response_datasets.clevr import format_clevr_cogent_dataset
-from nemo_rl.data.datasets.response_datasets.geometry3k import format_geometry3k_dataset
-from nemo_rl.data.datasets.response_datasets.refcoco import format_refcoco_dataset
-from nemo_rl.data.interfaces import (
-    DatumSpec,
-    LLMMessageLogType,
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
-from nemo_rl.data.multimodal_utils import (
-    PackedTensor,
-    get_dim_to_pack_along,
-    get_multimodal_keys_from_processor,
-)
-from nemo_rl.distributed.ray_actor_environment_registry import (
-    get_actor_python_env,
-)
+from nemo_rl.data.utils import setup_data_with_envs
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.interfaces import EnvironmentInterface
-from nemo_rl.environments.vlm_environment import VLMEnvironment
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -67,238 +40,6 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-# ===============================================================================
-#                             VLM Data Processor
-# ===============================================================================
-
-
-def resolve_to_image(image_path_or_image: str | Image.Image) -> Image.Image:
-    """Resolve the image path to a PIL.Image object.
-
-    image_path can be either:
-    - path to local file
-    - url to image
-    - base64 encoded image
-    """
-    if isinstance(image_path_or_image, Image.Image):
-        return image_path_or_image
-
-    if image_path_or_image.startswith(("http://", "https://")):
-        # Handle URL
-        response = requests.get(image_path_or_image)
-        response.raise_for_status()
-        return Image.open(BytesIO(response.content)).convert("RGB")
-    elif image_path_or_image.startswith("data:"):
-        # Handle base64 encoded image
-        # Format: data:image/jpeg;base64,/9j/4AAQSkZJRg...
-        header, encoded = image_path_or_image.split(",", 1)
-        image_data = base64.b64decode(encoded)
-        return Image.open(BytesIO(image_data)).convert("RGB")
-    else:
-        # Handle local file path
-        return Image.open(image_path_or_image).convert("RGB")
-
-
-def hf_data_processor(
-    datum_dict: dict[str, Any],
-    task_data_spec: TaskDataSpec,
-    processor: AutoProcessor,
-    max_seq_length: int,
-    idx: int,
-) -> DatumSpec:
-    """Process a datum dictionary (directly loaded from response_datasets/<dataset_name>.py) into a DatumSpec for the VLM Environment."""
-    # depending on the task, format the data differently
-    if task_data_spec.task_name == "clevr-cogent":
-        datum_dict = format_clevr_cogent_dataset(datum_dict)
-    elif task_data_spec.task_name == "refcoco":
-        datum_dict = format_refcoco_dataset(datum_dict)
-    elif task_data_spec.task_name == "geometry3k":
-        datum_dict = format_geometry3k_dataset(datum_dict)
-    else:
-        raise ValueError(f"No data processor for task {task_data_spec.task_name}")
-
-    user_message = datum_dict["messages"]
-    problem = user_message[0]["content"]
-    extra_env_info = {"ground_truth": user_message[1]["content"]}
-
-    message_log: LLMMessageLogType = []
-    ### only one round of interaction is assumed, this can easily be extended to a conversational setting
-    user_message = {"role": "user", "content": []}
-    #
-    images = []
-    if isinstance(problem, list):
-        for content in problem:
-            # for image, video, just append it
-            # for text, format the prompt to the problem
-            if content["type"] != "text":
-                user_message["content"].append(content)
-                if content["type"] == "image":
-                    images.append(content["image"])
-                else:
-                    raise ValueError(f"Unsupported content type: {content['type']}")
-            elif content["type"] == "text":
-                user_message["content"].append(
-                    {
-                        "type": "text",
-                        "text": task_data_spec.prompt.format(content["text"])
-                        if task_data_spec.prompt
-                        else content["text"],
-                    }
-                )
-    else:
-        # conversation consists of a text-only message
-        user_message["content"] = task_data_spec.prompt.format(problem)
-
-    images = [resolve_to_image(image) for image in images]
-
-    # get formatted user message
-    if hasattr(processor, "conversation_preprocessor"):
-        user_message_for_chat_template = processor.conversation_preprocessor(
-            user_message
-        )
-    else:
-        user_message_for_chat_template = user_message
-
-    # this is the string-tokenized conversation template for the generation policy (for vllm)
-    string_formatted_dialog = processor.apply_chat_template(
-        [user_message_for_chat_template],
-        tokenize=False,
-        add_generation_prompt=True,
-    )
-
-    # this is the id-tokenized and image processed conversation template for the policy
-    message: dict = processor.apply_chat_template(
-        [user_message],
-        tokenize=True,
-        add_generation_prompt=True,
-        return_tensors="pt",
-        return_dict=True,
-    )
-
-    # add this for backward compatibility
-    user_message["token_ids"] = message["input_ids"][0]
-    # add all keys and values to the user message, and the list of keys
-    multimodal_keys = get_multimodal_keys_from_processor(processor)
-    for key in multimodal_keys:
-        if key in message:
-            user_message[key] = PackedTensor(
-                message[key], dim_to_pack=get_dim_to_pack_along(processor, key)
-            )
-
-    # specifically for gemma, we need to add token_type_ids to the user message as a sequence-type value
-    if "token_type_ids" in message:
-        user_message["token_type_ids"] = message["token_type_ids"][0]
-
-    ### append to user message
-    message_log.append(user_message)
-
-    length = sum(len(m["token_ids"]) for m in message_log)
-    loss_multiplier = 1.0
-    if length >= max_seq_length:
-        # Treat truncated messages as text only
-        vllm_kwargs = {
-            "vllm_content": None,
-            "vllm_images": [],
-        }
-
-        # make smaller and mask out
-        for chat_message in message_log:
-            chat_message["token_ids"] = chat_message["token_ids"][
-                : min(4, max_seq_length // len(message_log))
-            ]
-            for key, value in chat_message.items():
-                if isinstance(value, PackedTensor):
-                    chat_message[key] = PackedTensor.empty_like(value)
-        loss_multiplier = 0.0
-    else:
-        # get the prompt content! (use this for vllm-backend that needs formatted dialog and list of images) for the entire conversation
-        # add images for vllm serving
-        vllm_kwargs = {
-            "vllm_content": string_formatted_dialog,
-            "vllm_images": images,
-        }
-
-    output: DatumSpec = {
-        "message_log": message_log,
-        "length": length,
-        "extra_env_info": extra_env_info,
-        "loss_multiplier": loss_multiplier,
-        "idx": idx,
-        "task_name": task_data_spec.task_name,
-        **vllm_kwargs,
-    }
-    return output
-
-
-def setup_data(
-    processor: AutoProcessor,
-    data_config: DataConfig,
-    env_configs: dict[str, Any],
-    seed: int,
-) -> tuple[
-    AllTaskProcessedDataset,
-    Optional[AllTaskProcessedDataset],
-    dict[str, EnvironmentInterface],
-    dict[str, EnvironmentInterface],
-]:
-    """This function will create a TaskSpec, DatumSpec, and connect the two.
-
-    task_spec contains the task name as well as prompt and system prompt modifiers that can be used by data processor
-    """
-    print("\n▶ Setting up data...")
-
-    # load dataset
-    # TODO @yukih: currently seed is not used for vlm datasets
-    data: Any = load_response_dataset(data_config, seed)
-
-    task_name = data.task_name
-    vlm_task_spec = TaskDataSpec(
-        task_name=task_name,
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
-
-    # add data processor for different tasks
-    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (vlm_task_spec, hf_data_processor))
-    )
-    task_data_processors[task_name] = (vlm_task_spec, hf_data_processor)
-
-    env_name = data_config["env_name"]
-    vlm_env = VLMEnvironment.options(  # type: ignore # it's wrapped with ray.remote
-        runtime_env={
-            "py_executable": get_actor_python_env(
-                "nemo_rl.environments.vlm_environment.VLMEnvironment"
-            ),
-            "env_vars": dict(os.environ),  # Pass thru all user environment variables
-        }
-    ).remote(env_configs[env_name])
-
-    dataset = AllTaskProcessedDataset(
-        data.formatted_ds["train"],
-        processor,
-        vlm_task_spec,
-        task_data_processors,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
-
-    val_dataset: Optional[AllTaskProcessedDataset] = None
-    if data.formatted_ds["validation"]:
-        val_dataset = AllTaskProcessedDataset(
-            data.formatted_ds["validation"],
-            processor,
-            vlm_task_spec,
-            task_data_processors,
-            max_seq_length=data_config["max_input_seq_length"],
-        )
-    else:
-        val_dataset = None
-
-    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: vlm_env)
-    task_to_env[task_name] = vlm_env
-    return dataset, val_dataset, task_to_env, task_to_env
-
-
 def main() -> None:
     """Main entry point."""
     args, overrides = parse_args()
@@ -356,7 +97,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(processor, config["data"], config["env"], config["grpo"]["seed"])
+    ) = setup_data_with_envs(processor, config["data"], config["env"], is_vlm=True)
 
     (
         policy,
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 8ab62d00fb..3ddcf0dd64 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -61,6 +61,7 @@
     run_multi_turn_rollout,
 )
 from nemo_rl.models.generation.interfaces import GenerationInterface
+from nemo_rl.models.generation.sglang import SGLangConfig, SGLangGeneration
 from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import ColocatablePolicyInterface
@@ -71,6 +72,7 @@
     LoggerConfig,
     print_message_log_samples,
 )
+from nemo_rl.utils.memory_tracker import MemoryTracker
 from nemo_rl.utils.nsys import maybe_gpu_profile_step
 from nemo_rl.utils.timer import TimeoutChecker, Timer
 from nemo_rl.utils.venvs import create_local_venv_on_each_node
@@ -124,6 +126,7 @@ class GRPOConfig(TypedDict):
     val_batch_size: int
     val_at_start: bool
     max_val_samples: int
+    skip_reference_policy_logprobs_calculation: NotRequired[bool]
     seed: int
     async_grpo: NotRequired[AsyncGRPOConfig]
     overlong_filtering: NotRequired[bool]
@@ -138,6 +141,8 @@ class GRPOConfig(TypedDict):
     batch_multiplier: NotRequired[float]
     reward_shaping: RewardShapingConfig
     reward_scaling: RewardScalingConfig
+    # By default advantages are calculated on CPU. Setting this flag to true leverages GPU for their computation.
+    calculate_advantages_on_gpu: NotRequired[bool]
 
 
 class GRPOSaveState(TypedDict):
@@ -482,9 +487,77 @@ def init_vllm():
         pg.finish_generation()
         return pg, time.perf_counter() - t0
 
-    # Handle backend-specific setup
+    def init_sglang():
+        """Initialize SGLang generation workers."""
+        t0 = time.perf_counter()
+        pg = SGLangGeneration(cluster=inference_cluster, config=generation_config)
+        pg.finish_generation()
+        return pg, time.perf_counter() - t0
+
+    def initialize_generation_with_policy(
+        init_generation_fn,
+        generation_name: str,
+        init_time_key: str,
+        colocated_inference: bool,
+        worker_init_timing_metrics: dict,
+    ):
+        """Generic function to initialize a generation engine (vLLM or SGLang) along with policy.
+
+        Args:
+            init_generation_fn: Function that initializes the generation engine (init_vllm or init_sglang)
+            generation_name: Name of the generation engine ("vLLM" or "SGLang")
+            init_time_key: Key name for storing initialization time in metrics ("vllm_init_time_s" or "sglang_init_time_s")
+            colocated_inference: Whether inference is colocated with training
+            worker_init_timing_metrics: Dictionary to store timing metrics
+
+        Returns:
+            Tuple of (policy_generation, policy)
+        """
+        # Determine if parallel initialization is possible (non-colocated mode)
+        use_parallel_init = not colocated_inference
+
+        if use_parallel_init:
+            # Parallel initialization: Generation engine and Policy can initialize simultaneously
+            print(
+                "  ⚡ Using parallel worker initialization (non-colocated mode)",
+                flush=True,
+            )
+
+            # Execute both initializations in parallel
+            parallel_start_time = time.perf_counter()
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                generation_future = executor.submit(init_generation_fn)
+                policy_future = executor.submit(init_policy)
+                policy_generation, generation_time = generation_future.result()
+                policy, policy_time = policy_future.result()
+            parallel_wall_time = time.perf_counter() - parallel_start_time
+
+            # Store timing metrics
+            worker_init_timing_metrics[init_time_key] = generation_time
+            worker_init_timing_metrics["policy_init_time_s"] = policy_time
+            worker_init_timing_metrics["parallel_wall_time_s"] = parallel_wall_time
+            worker_init_timing_metrics["parallel_init_enabled"] = True
+
+        else:
+            # Sequential initialization: colocated mode (GPU memory requires generation engine first)
+            print(
+                "  ⚙️  Using sequential worker initialization (colocated mode)",
+                flush=True,
+            )
+
+            # Initialize generation engine first (clean GPU memory), then policy
+            policy_generation, generation_time = init_generation_fn()
+            worker_init_timing_metrics[init_time_key] = generation_time
+
+            policy, policy_time = init_policy()
+            worker_init_timing_metrics["policy_init_time_s"] = policy_time
+            worker_init_timing_metrics["parallel_init_enabled"] = 0.0
+
+        return policy_generation, policy
+
+    # Handle generation-specific setup
     if backend == "megatron":
-        # Megatron backend: policy_generation is None, only initialize policy
+        # Megatron generation: policy_generation is None, only initialize policy
         policy_generation = None
         print(
             f"  ✓ Using {backend} backend for generation with {policy_config['model_name']}",
@@ -495,7 +568,7 @@ def init_vllm():
         worker_init_timing_metrics["policy_init_time_s"] = policy_time
 
     elif backend == "vllm":
-        # vLLM backend: setup config, then decide parallel vs sequential init
+        # vLLM generation: setup config, then initialize with policy
         generation_config = cast(VllmConfig, generation_config)
         if generation_config["vllm_cfg"]["precision"] == "fp8":
             assert loss_config["use_importance_sampling_correction"] is True, (
@@ -523,48 +596,36 @@ def init_vllm():
             "hf_config_overrides", {}
         )
 
-        # Determine if parallel initialization is possible (non-colocated mode)
-        use_parallel_init = not colocated_inference
-
-        if use_parallel_init:
-            # Parallel initialization: vLLM and Policy can initialize simultaneously
-            print(
-                "  ⚡ Using parallel worker initialization (non-colocated mode)",
-                flush=True,
-            )
+        policy_generation, policy = initialize_generation_with_policy(
+            init_generation_fn=init_vllm,
+            generation_name="vLLM",
+            init_time_key="vllm_init_time_s",
+            colocated_inference=colocated_inference,
+            worker_init_timing_metrics=worker_init_timing_metrics,
+        )
 
-            # Execute both initializations in parallel
-            parallel_start_time = time.perf_counter()
-            with ThreadPoolExecutor(max_workers=2) as executor:
-                vllm_future = executor.submit(init_vllm)
-                policy_future = executor.submit(init_policy)
-                policy_generation, vllm_time = vllm_future.result()
-                policy, policy_time = policy_future.result()
-            parallel_wall_time = time.perf_counter() - parallel_start_time
+        print(
+            f"  ✓ Using vLLM backend for generation with {policy_config['model_name']}",
+            flush=True,
+        )
 
-            # Store timing metrics
-            worker_init_timing_metrics["vllm_init_time_s"] = vllm_time
-            worker_init_timing_metrics["policy_init_time_s"] = policy_time
-            worker_init_timing_metrics["parallel_wall_time_s"] = parallel_wall_time
-            worker_init_timing_metrics["parallel_init_enabled"] = True
+    elif backend == "sglang":
+        generation_config = cast(SGLangConfig, generation_config)
 
-        else:
-            # Sequential initialization: colocated mode (GPU memory requires vLLM first)
-            print(
-                "  ⚙️  Using sequential worker initialization (colocated mode)",
-                flush=True,
-            )
+        # Set model_path if not already set
+        if "model_path" not in generation_config["sglang_cfg"]:
+            generation_config["sglang_cfg"]["model_path"] = policy_config["model_name"]
 
-            # Initialize vLLM first (clean GPU memory), then policy
-            policy_generation, vllm_time = init_vllm()
-            worker_init_timing_metrics["vllm_init_time_s"] = vllm_time
-
-            policy, policy_time = init_policy()
-            worker_init_timing_metrics["policy_init_time_s"] = policy_time
-            worker_init_timing_metrics["parallel_init_enabled"] = 0.0
+        policy_generation, policy = initialize_generation_with_policy(
+            init_generation_fn=init_sglang,
+            generation_name="SGLang",
+            init_time_key="sglang_init_time_s",
+            colocated_inference=colocated_inference,
+            worker_init_timing_metrics=worker_init_timing_metrics,
+        )
 
         print(
-            f"  ✓ Using vLLM backend for generation with {policy_config['model_name']}",
+            f"  ✓ Using SGLang backend for generation with {policy_config['model_name']}",
             flush=True,
         )
 
@@ -901,6 +962,15 @@ def _should_use_nemo_gym(master_config: MasterConfig) -> bool:
     return should_use_nemo_gym
 
 
+def _should_log_nemo_gym_responses(master_config: MasterConfig) -> bool:
+    env_config = master_config.get("env") or dict()
+    should_log_nemo_gym_responses = bool(
+        env_config.get("should_log_nemo_gym_responses")
+    )
+
+    return should_log_nemo_gym_responses
+
+
 def refit_policy_generation(
     policy: ColocatablePolicyInterface,
     policy_generation: GenerationInterface,
@@ -945,16 +1015,37 @@ def refit_policy_generation(
                     policy.get_free_memory_bytes() * float(memory_ratio)
                 )
 
-            futures_train = policy.stream_weights_via_ipc_zmq(
-                buffer_size_bytes=buffer_size_bytes, kv_scales=kv_scales
-            )
-            futures_inference = policy_generation.update_weights_via_ipc_zmq()
-            # wait for all futures to complete
-            ray.get(futures_train)
-            results = ray.get(futures_inference)
-            update_success = all(result for result in results if result is not None)
+            if isinstance(policy_generation, SGLangGeneration):
+                sglang_url_to_gpu_uuids = (
+                    policy_generation.get_sglang_url_to_gpu_uuids()
+                )
+                # Stream weights via HTTP
+                flush_success = policy_generation.invalidate_kv_cache()
+                if not flush_success:
+                    print("SGLang KV cache invalidation failed before weight update. ")
+                futures_train = policy.stream_weights_via_http(
+                    sglang_url_to_gpu_uuids=sglang_url_to_gpu_uuids,
+                )
+                # Wait for all workers to complete
+                ray.get(futures_train)
+                update_success = True
+            else:
+                # Original ZMQ IPC path for vLLM
+                futures_train = policy.stream_weights_via_ipc_zmq(
+                    buffer_size_bytes=buffer_size_bytes
+                )
+                futures_inference = policy_generation.update_weights_via_ipc_zmq()
+                # wait for all futures to complete
+                ray.get(futures_train)
+                results = ray.get(futures_inference)
+                update_success = all(result for result in results if result is not None)
         else:
             # update weights through nccl
+            # SGLang haven't implemented non-colocated inference mode.
+            if isinstance(policy_generation, SGLangGeneration):
+                raise NotImplementedError(
+                    "SGLang haven't implemented non-colocated inference mode. "
+                )
             futures_train = policy.broadcast_weights_for_collective(kv_scales=kv_scales)
             futures_inference = policy_generation.update_weights_from_collective()
             # wait for all futures to complete
@@ -977,6 +1068,30 @@ def refit_policy_generation(
         policy_generation.prepare_for_generation(tags=["kv_cache"])
 
 
+def _log_mixed_rewards_and_advantages_information(
+    logger: Logger,
+    total_steps: int,
+    metrics: dict[str, Any],
+    baseline: torch.Tensor,
+    advantages: torch.Tensor,
+) -> None:
+    # The histograms that are logged are logged with a prefix "train/" to the name, since that is what the remaining metrics will be logged with.
+    logger.log_histogram(
+        baseline.numpy(), total_steps + 1, "train/baseline_reward/histogram"
+    )
+    metrics["baseline_reward/pct_0"] = 100 * (baseline == 0).float().mean().item()
+    metrics["baseline_reward/pct_1"] = 100 * (baseline == 1).float().mean().item()
+    metrics["baseline_reward/pct_mixed"] = (
+        100 - metrics["baseline_reward/pct_0"] - metrics["baseline_reward/pct_1"]
+    )
+
+    logger.log_histogram(
+        advantages.numpy(), total_steps + 1, "train/advantages/histogram"
+    )
+    metrics["advantages/sum"] = advantages.float().sum().item()
+    metrics["advantages/mean"] = advantages.float().mean().item()
+
+
 # ===============================================================================
 # Training & Validation
 # ===============================================================================
@@ -1004,6 +1119,7 @@ def grpo_train(
         fit_last_save_time=True,
     )
     timeout.start_iterations()
+    memory_tracker = MemoryTracker()
 
     kv_scales_cache = None  # Cache reused for computed kv scales
 
@@ -1015,11 +1131,17 @@ def grpo_train(
     POLICY_GENERATION_STALE = True  # tracks if generation needs a refit before running
     assert policy_generation is not None  # for mypy type check
 
+    if master_config["grpo"].get("skip_reference_policy_logprobs_calculation"):
+        assert master_config["loss_fn"]["reference_policy_kl_penalty"] == 0
+        print(
+            "Reference policy logprob calculation will be skipped since `grpo.skip_reference_policy_logprobs_calculation` is set to True and `loss_fn.reference_policy_kl_penalty` is 0."
+        )
+
     # Check if we need to sync KV cache scales
     # When fallback to policy as the policy_generation, we use getattr to check.
     sync_kv_scales = getattr(policy_generation, "requires_kv_scale_sync", False)
 
-    # common config/state itmes
+    # common config/state times
     current_step = grpo_save_state["current_step"]  # current step within an epoch
     total_steps = grpo_save_state["total_steps"]  # total steps across all epochs
     max_num_steps = master_config["grpo"][
@@ -1043,6 +1165,8 @@ def grpo_train(
     # TODO: Add validation with kv scales if needed
     if val_at_start and current_step == 0:
         print("\n🔍 Running initial validation...", flush=True)
+        memory_tracker.snapshot_start_of_stage("Initial validation", dir())
+
         if NEED_REFIT and POLICY_GENERATION_STALE:
             refit_policy_generation(policy, policy_generation, colocated_inference)
             POLICY_GENERATION_STALE = False
@@ -1055,12 +1179,14 @@ def grpo_train(
             val_task_to_env,
             step=0,
             master_config=master_config,
+            logger=logger,
         )
         policy_generation.finish_generation()
         logger.log_metrics(val_metrics, current_step, prefix="validation")
         logger.log_metrics(validation_timings, current_step, prefix="timing/validation")
 
     while current_epoch < max_num_epochs and total_steps < max_num_steps:
+        memory_tracker.snapshot_start_of_stage("Preparing batch", dir())
         print(f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_num_epochs} {'=' * 25}")
         # batch cache is used for DAPO. We store prompts with non-zero standard deviation in this cache.
         batch_cache: BatchedDataDict[DatumSpec] = None
@@ -1069,6 +1195,10 @@ def grpo_train(
 
         # Run grpo/dapo training loop (single-turn)
         for batch in dataloader:
+            # A central place to store logging data that won't be deleted until the loop ends
+            metrics_logging_data = dict()
+            metrics = dict()
+
             print(
                 f"\n{'=' * 25} Step {current_step + 1}/{min(len(dataloader), max_num_steps)} {'=' * 25}",
                 flush=True,
@@ -1096,6 +1226,7 @@ def grpo_train(
                     input_ids = batched_flat["token_ids"]
 
                 # Generate responses - this updates the LLMMessageLogType in repeated_batch
+                memory_tracker.snapshot_start_of_stage("Generation", dir())
                 print(
                     f"▶ Generating responses for batch of size {repeated_batch.size}...",
                     flush=True,
@@ -1148,11 +1279,9 @@ def grpo_train(
 
                 dynamic_sampling_num_gen_batches += 1
                 with timer.time("generation"):
-                    # Clear vLLM logger metrics for each generation step
-                    if policy_generation is not None and hasattr(
-                        policy_generation, "clear_vllm_logger_metrics"
-                    ):
-                        policy_generation.clear_vllm_logger_metrics()
+                    # Clear logger metrics for each generation step
+                    if policy_generation is not None:
+                        policy_generation.clear_logger_metrics()
                     # Use NeMo-Gym rollouts if enabled. We cascade NeMo-Gym first since NeMo-Gym requires async rollouts.
                     if _should_use_nemo_gym(master_config):
                         generation_config = master_config["policy"]["generation"]
@@ -1169,6 +1298,14 @@ def grpo_train(
                         input_ids = nemo_gym_rollout_result.input_ids
                         repeated_batch = nemo_gym_rollout_result.final_batch
                         rollout_metrics = nemo_gym_rollout_result.rollout_metrics
+                        del nemo_gym_rollout_result
+
+                        # NeMo Gym responses can be very large and expensive to log. Here we have logic to opt-in to logging.
+                        if not _should_log_nemo_gym_responses(master_config):
+                            for key in list(rollout_metrics):
+                                if "full_result" in key:
+                                    rollout_metrics.pop(key)
+
                     # Use async rollouts if vLLM async engine is enabled
                     elif _should_use_async_rollouts(master_config):
                         (
@@ -1202,16 +1339,17 @@ def grpo_train(
                             greedy=False,
                         )
                     policy_generation.finish_generation()
-                    # Collect vLLM logger metrics for performance reporting after each generation step
-                    # inflight batch sizes and num pending samples are collected from each vLLM worker
-                    if policy_generation is not None and hasattr(
-                        policy_generation, "get_vllm_logger_metrics"
-                    ):
-                        vllm_logger_metrics = (
-                            policy_generation.get_vllm_logger_metrics()
+                    # Collect generation logger metrics for performance reporting after each generation step
+                    # inflight batch sizes and num pending samples are collected from each worker
+                    if policy_generation is not None:
+                        generation_logger_metrics = (
+                            policy_generation.get_logger_metrics()
                         )
-                    else:
-                        vllm_logger_metrics = {}
+
+                    metrics_logging_data["mean_gen_tokens_per_sample"] = (
+                        rollout_metrics["mean_gen_tokens_per_sample"]
+                    )
+                    logger.log_metrics(rollout_metrics, total_steps + 1, prefix="train")
 
                 repeated_batch = scale_rewards(
                     repeated_batch, master_config["grpo"]["reward_scaling"]
@@ -1223,20 +1361,37 @@ def grpo_train(
                     )
 
                 # Calculate rewards & advantages
+                memory_tracker.snapshot_start_of_stage("Processing rewards", dir())
                 print("▶ Processing rewards...,", flush=True)
                 with timer.time("reward_calculation"):
                     # Extract rewards from final_batch
                     rewards = repeated_batch["total_reward"]
 
                     print("▶ Computing advantages...", flush=True)
-                    baseline, std = calculate_baseline_and_std_per_prompt(
-                        input_ids,
-                        rewards,
-                        torch.ones_like(rewards),
-                        leave_one_out_baseline=master_config["grpo"][
-                            "use_leave_one_out_baseline"
-                        ],
-                    )
+                    if master_config["grpo"].get("calculate_advantages_on_gpu"):
+                        print("Computing advantages on GPU!")
+                        # Just fix the device id for now
+                        device_id = 0
+                        baseline, std = calculate_baseline_and_std_per_prompt(
+                            input_ids.cuda(device_id),
+                            rewards.cuda(device_id),
+                            torch.ones_like(rewards).cuda(device_id),
+                            leave_one_out_baseline=master_config["grpo"][
+                                "use_leave_one_out_baseline"
+                            ],
+                        )
+                        baseline = baseline.cpu()
+                        std = std.cpu()
+                    else:
+                        baseline, std = calculate_baseline_and_std_per_prompt(
+                            input_ids,
+                            rewards,
+                            torch.ones_like(rewards),
+                            leave_one_out_baseline=master_config["grpo"][
+                                "use_leave_one_out_baseline"
+                            ],
+                        )
+
                     # Apply dynamic sampling to filter prompts with non-zero std (DAPO algorithm)
                     repeated_batch, is_batch_complete, batch_cache, ds_metrics = (
                         dynamic_sampling(
@@ -1273,6 +1428,18 @@ def grpo_train(
                             std=std,
                         )
 
+                    _log_mixed_rewards_and_advantages_information(
+                        logger=logger,
+                        total_steps=total_steps,
+                        metrics=metrics,
+                        baseline=baseline,
+                        advantages=advantages,
+                    )
+
+                    del input_ids
+                    del baseline
+                    del std
+
                 with timer.time("data_processing"):
                     use_overlong_filtering = master_config["grpo"]["overlong_filtering"]
                     if use_overlong_filtering:
@@ -1302,6 +1469,7 @@ def grpo_train(
                             message["advantages"] = advantages[i].expand(
                                 message["token_ids"].shape
                             )
+                    del advantages
 
                     # Convert updated LLMMessageLogType to FlatMessagesType for training
                     flat_messages, input_lengths = batched_message_log_to_flat_message(
@@ -1324,24 +1492,47 @@ def grpo_train(
                         }
                     )
                     # this will be mini-batched inside the policy, so maintain the packed multimodal structure
-                    train_data.update(
-                        flat_messages.get_multimodal_dict(as_tensors=False)
+                    # This is also used to populate part of the downstream logprob calculation data
+                    extra_multimodal_data = flat_messages.get_multimodal_dict(
+                        as_tensors=False
                     )
+                    train_data.update(extra_multimodal_data)
                     train_data.to("cpu")
 
+                    metrics_logging_data["content"] = flat_messages["content"]
+
+                memory_tracker.snapshot_start_of_stage("Computing logprobs", dir())
                 print("▶ Preparing for logprob inference...", flush=True)
                 with timer.time("logprob_inference_prep"):
                     policy.prepare_for_lp_inference()
 
                 print("▶ Computing logprobs...", flush=True)
                 with timer.time("policy_and_reference_logprobs"):
-                    fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
-                    reference_logprobs = policy.get_reference_policy_logprobs(
-                        train_data
-                    )["reference_logprobs"]
-                    train_data["prev_logprobs"] = fprop_logprobs
-                    train_data["reference_policy_logprobs"] = reference_logprobs
+                    # Custom create this logprob_data so we avoid Ray comm overheads sending unused data to workers.
+                    logprob_data = BatchedDataDict[ClippedPGLossDataDict](
+                        {
+                            "input_ids": train_data["input_ids"],
+                            "input_lengths": train_data["input_lengths"],
+                            **extra_multimodal_data,
+                        }
+                    )
+                    train_data["prev_logprobs"] = policy.get_logprobs(logprob_data)[
+                        "logprobs"
+                    ]
 
+                    if not master_config["grpo"].get(
+                        "skip_reference_policy_logprobs_calculation"
+                    ):
+                        train_data["reference_policy_logprobs"] = (
+                            policy.get_reference_policy_logprobs(logprob_data)[
+                                "reference_logprobs"
+                            ]
+                        )
+
+                    del logprob_data
+                    del extra_multimodal_data
+
+                memory_tracker.snapshot_start_of_stage("Policy train", dir())
                 print("▶ Preparing for training...", flush=True)
                 with timer.time("training_prep"):
                     policy.prepare_for_training()  # set model train and reload optim to GPU
@@ -1371,6 +1562,7 @@ def grpo_train(
 
                 # Run validation if it's a validation step
                 if val_period > 0 and (total_steps + 1) % val_period == 0:
+                    memory_tracker.snapshot_start_of_stage("Validation", dir())
                     if NEED_REFIT and POLICY_GENERATION_STALE:
                         refit_policy_generation(
                             policy,
@@ -1390,6 +1582,7 @@ def grpo_train(
                         val_task_to_env,
                         step=total_steps + 1,
                         master_config=master_config,
+                        logger=logger,
                     )
                     policy_generation.finish_generation()
                     logger.log_metrics(
@@ -1402,13 +1595,16 @@ def grpo_train(
                 # Get flat advantages and token mask for masked metrics computation
                 flat_advantages = flat_messages["advantages"]
                 flat_token_mask = flat_messages["token_loss_mask"]
+                del flat_messages
 
                 # Filter advantages using token mask (only valid response tokens)
                 response_advantages = torch.masked_select(
                     flat_advantages, flat_token_mask.bool()
                 )
 
+                memory_tracker.snapshot_start_of_stage("Metrics", dir())
                 metrics = {
+                    **metrics,
                     "loss": train_results["loss"].numpy(),
                     "grad_norm": train_results["grad_norm"].numpy(),
                     "reward": rewards.numpy(),
@@ -1456,11 +1652,13 @@ def grpo_train(
                         "mean_prompt_length",
                     }:
                         metrics[k] = np.mean(v).item()
-                    else:
+                    elif isinstance(v, (np.ndarray, list)):
                         metrics[k] = np.sum(v).item()
+                    else:
+                        print(f"Skipping aggregation for {k} ({type(v)})")
 
                 metrics.update(rollout_metrics)
-                metrics["vllm_logger_metrics"] = vllm_logger_metrics
+                metrics["generation_logger_metrics"] = generation_logger_metrics
                 total_valid_tokens += metrics["global_valid_toks"]
 
                 ## Checkpointing
@@ -1476,6 +1674,7 @@ def grpo_train(
                 # Check if timeout-based checkpointing is enabled in config.
                 should_save_by_timeout = timeout.check_save()
 
+                memory_tracker.snapshot_start_of_stage("Checkpointing", dir())
                 if master_config["checkpointing"]["enabled"] and (
                     should_save_by_step or should_save_by_timeout
                 ):
@@ -1549,18 +1748,23 @@ def grpo_train(
 
             # Logging
             # Log training data
-            log_data = {"content": flat_messages["content"]}
-            log_data["rewards"] = rewards.tolist()
-            if master_config["grpo"]["use_dynamic_sampling"]:
-                log_data["filtered_rewards"] = rewards.tolist()
-                log_data["rewards"] = repeated_batch["total_reward"].tolist()
-
-            log_data["generation_logprobs"] = train_data["generation_logprobs"].tolist()
-            log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist()
-            log_data["input_lengths"] = input_lengths.tolist()
-            logger.log_batched_dict_as_jsonl(
-                log_data, f"train_data_step{total_steps + 1}.jsonl"
-            )
+            memory_tracker.snapshot_start_of_stage("Logging", dir())
+            if not _should_log_nemo_gym_responses(master_config):
+                log_data = {"content": metrics_logging_data["content"]}
+                log_data["rewards"] = rewards.tolist()
+                if master_config["grpo"]["use_dynamic_sampling"]:
+                    log_data["filtered_rewards"] = rewards.tolist()
+                    log_data["rewards"] = repeated_batch["total_reward"].tolist()
+
+                log_data["generation_logprobs"] = train_data[
+                    "generation_logprobs"
+                ].tolist()
+                log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist()
+                log_data["input_lengths"] = input_lengths.tolist()
+                logger.log_batched_dict_as_jsonl(
+                    log_data, f"train_data_step{total_steps + 1}.jsonl"
+                )
+                del log_data
 
             timing_metrics: dict[str, float] = timer.get_timing_metrics(
                 reduction_op="sum"
@@ -1583,7 +1787,7 @@ def grpo_train(
                 "enable_vllm_metrics_logger", False
             ) and master_config.get("logger", {}).get("wandb_enabled", False):
                 log_generation_metrics_to_wandb(
-                    vllm_logger_metrics,
+                    generation_logger_metrics,
                     total_steps + 1,
                     master_config["policy"]["generation"]["vllm_cfg"][
                         "vllm_metrics_logger_interval"
@@ -1617,7 +1821,7 @@ def grpo_train(
             else:
                 print(f"  • Avg Reward: {np.mean(rewards.numpy()):.4f}")
             print(
-                f"  • Mean Generation Length: {rollout_metrics['mean_gen_tokens_per_sample']:.4f}",
+                f"  • Mean Generation Length: {metrics_logging_data['mean_gen_tokens_per_sample']:.4f}",
                 flush=True,
             )
 
@@ -1655,19 +1859,39 @@ def grpo_train(
             logger.log_metrics(
                 performance_metrics, total_steps + 1, prefix="performance"
             )
-            logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
+            # step_finished=True here since this is the final log of our current step.
+            logger.log_metrics(
+                timing_metrics,
+                total_steps + 1,
+                prefix="timing/train",
+                step_finished=True,
+            )
 
             # Reset the batch and set dynamic_sampling_num_gen_batches to 0
             batch_cache = None
             dynamic_sampling_num_gen_batches = 0
 
+            # Clear mem
+            memory_tracker.snapshot_start_of_stage("After CPU memory clear", dir())
+
+            # processing rewards
+            del repeated_batch
+            del rewards
+            del train_data
+            # logging
+            del metrics
+            if "val_metrics" in dir():
+                del val_metrics
+
             timer.reset()
             current_step += 1
             total_steps += 1
             if should_save_by_timeout:
+                memory_tracker.snapshot_start_of_stage("", dir())
                 print("Timeout has been reached, stopping training early", flush=True)
                 return
             if total_steps >= max_num_steps:
+                memory_tracker.snapshot_start_of_stage("", dir())
                 print(
                     "Max number of steps has been reached, stopping training early",
                     flush=True,
@@ -1685,6 +1909,7 @@ def validate(
     val_task_to_env: Optional[dict[str, EnvironmentInterface]],
     step: int,
     master_config: MasterConfig,
+    logger: Optional[Logger] = None,
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     """Run validation on the validation dataset."""
     if val_dataloader is None:
@@ -1811,6 +2036,14 @@ def validate(
     validation_time = timing_metrics.get("total_validation_time", 0)
     print(f"    • Total validation time: {validation_time:.2f}s", flush=True)
 
+    # Log validation data to JSONL file
+    if logger is not None:
+        val_log_data = {
+            "content": all_message_logs,
+            "rewards": total_rewards,
+        }
+        logger.log_batched_dict_as_jsonl(val_log_data, f"val_data_step{step}.jsonl")
+
     # Make sure to reset the timer after validation
     timer.reset()
 
@@ -2035,6 +2268,7 @@ def async_grpo_train(
                 val_task_to_env,
                 step=0,
                 master_config=master_config,
+                logger=logger,
             )
             policy_generation.finish_generation()
             logger.log_metrics(val_metrics, step, prefix="validation")
@@ -2051,12 +2285,9 @@ def async_grpo_train(
             trajectory_collector.resume.remote()
 
     print("✅ All setup complete, starting buffer wait...")
-
-    # Clear vLLM logger metrics after at start of training
-    if policy_generation is not None and hasattr(
-        policy_generation, "clear_vllm_logger_metrics"
-    ):
-        policy_generation.clear_vllm_logger_metrics()
+    # Clear logger metrics at start of training
+    if policy_generation is not None:
+        policy_generation.clear_logger_metrics()
 
     # Wait for initial buffer fill
     print(
@@ -2296,23 +2527,19 @@ def async_grpo_train(
                     train_results = policy.train(train_data, loss_fn)
 
                 print("🔄 Synchronizing policy weights to trajectory collector…")
-                vllm_logger_metrics = None
+                generation_logger_metrics = None
                 if NEED_REFIT:
                     # Measure pending-generation wait as exposed_generation time
                     print("🔄 Coordinating with trajectory collector before refit...")
                     with timer.time("exposed_generation"):
                         ray.get(trajectory_collector.prepare_for_refit.remote())
 
-                    # Collect vLLM logger metrics for performance reporting
-                    # inflight batch sizes and num pending samples are collected from each vLLM worker
-                    if policy_generation is not None and hasattr(
-                        policy_generation, "get_vllm_logger_metrics"
-                    ):
-                        vllm_logger_metrics = (
-                            policy_generation.get_vllm_logger_metrics()
+                    # Collect generation logger metrics for performance reporting
+                    # inflight batch sizes and num pending samples are collected from each worker
+                    if policy_generation is not None:
+                        generation_logger_metrics = (
+                            policy_generation.get_logger_metrics()
                         )
-                    else:
-                        vllm_logger_metrics = {}
 
                     # Only the actual refit/weight transfer should be counted as weight_sync
                     print("🔄 Performing policy generation refit...")
@@ -2327,11 +2554,9 @@ def async_grpo_train(
                         trajectory_collector.set_weight_version.remote(weight_version)
                         trajectory_collector.resume_after_refit.remote()
 
-                # Clear vLLM logger metrics after each refit (weight sync), starting a new logging cycle
-                if policy_generation is not None and hasattr(
-                    policy_generation, "clear_vllm_logger_metrics"
-                ):
-                    policy_generation.clear_vllm_logger_metrics()
+                # Clear logger metrics after each refit (weight sync), starting a new logging cycle
+                if policy_generation is not None:
+                    policy_generation.clear_logger_metrics()
 
                 # Validation
                 val_metrics, validation_timings = None, None
@@ -2355,6 +2580,7 @@ def async_grpo_train(
                         val_task_to_env,
                         step=step + 1,
                         master_config=master_config,
+                        logger=logger,
                     )
                     policy_generation.finish_generation()
                     logger.log_metrics(
@@ -2424,8 +2650,8 @@ def async_grpo_train(
                     else:
                         metrics[k] = np.sum(v).item()
                 metrics.update(rollout_metrics)
-                if vllm_logger_metrics is not None:
-                    metrics["vllm_logger_metrics"] = vllm_logger_metrics
+                if generation_logger_metrics is not None:
+                    metrics["generation_logger_metrics"] = generation_logger_metrics
                 total_valid_tokens += metrics["global_valid_toks"]
 
                 # Checkpointing (same as sync version)
@@ -2532,7 +2758,7 @@ def async_grpo_train(
                 "enable_vllm_metrics_logger", False
             ) and master_config.get("logger", {}).get("wandb_enabled", False):
                 log_generation_metrics_to_wandb(
-                    vllm_logger_metrics,
+                    generation_logger_metrics,
                     step + 1,
                     master_config["policy"]["generation"]["vllm_cfg"][
                         "vllm_metrics_logger_interval"
diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py
index 459181c899..21333d1f8d 100755
--- a/nemo_rl/algorithms/loss_functions.py
+++ b/nemo_rl/algorithms/loss_functions.py
@@ -168,7 +168,8 @@ def __call__(
         advantages = data["advantages"][:, 1:]
         prev_logprobs = data["prev_logprobs"][:, 1:]
         generation_logprobs = data["generation_logprobs"][:, 1:]
-        reference_policy_logprobs = data["reference_policy_logprobs"][:, 1:]
+        if self.reference_policy_kl_penalty != 0:
+            reference_policy_logprobs = data["reference_policy_logprobs"][:, 1:]
         seq_index = data.get("seq_index", None)
 
         mask = token_mask * sample_mask.unsqueeze(-1)
diff --git a/nemo_rl/algorithms/sft.py b/nemo_rl/algorithms/sft.py
index 09cbdf93c2..b5787fdb28 100644
--- a/nemo_rl/algorithms/sft.py
+++ b/nemo_rl/algorithms/sft.py
@@ -28,7 +28,6 @@
 from nemo_rl.data import DataConfig
 from nemo_rl.data.collate_fn import rl_collate_fn
 from nemo_rl.data.datasets import AllTaskProcessedDataset
-from nemo_rl.data.interfaces import TaskDataSpec
 from nemo_rl.data.llm_message_utils import (
     add_loss_mask_to_message_log,
     batched_message_log_to_flat_message,
@@ -238,7 +237,6 @@ def validate(
     loss_fn,
     step: int,
     master_config: MasterConfig,
-    sft_task_spec: TaskDataSpec,
     val_batches: int,
     val_batch_size: int,
     val_mbs: int,
@@ -358,7 +356,6 @@ def sft_train(
     loss_fn,
     master_config,
     logger,
-    sft_task_spec,
     checkpointer,
     sft_save_state: SFTSaveState,
 ) -> None:
@@ -400,7 +397,6 @@ def sft_train(
             loss_fn,
             step=0,
             master_config=master_config,
-            sft_task_spec=sft_task_spec,
             val_batches=sft_config["val_batches"],
             val_batch_size=sft_config["val_global_batch_size"],
             val_mbs=sft_config["val_micro_batch_size"],
@@ -474,7 +470,6 @@ def sft_train(
                         loss_fn,
                         step=total_steps + 1,
                         master_config=master_config,
-                        sft_task_spec=sft_task_spec,
                         val_batches=sft_config["val_batches"],
                         val_batch_size=sft_config["val_global_batch_size"],
                         val_mbs=sft_config["val_micro_batch_size"],
diff --git a/nemo_rl/algorithms/utils.py b/nemo_rl/algorithms/utils.py
index 17c69e479a..cc99033aba 100644
--- a/nemo_rl/algorithms/utils.py
+++ b/nemo_rl/algorithms/utils.py
@@ -520,9 +520,10 @@ def visualize_per_worker_timeline(
     ).get("enable_vllm_metrics_logger", False) and master_config["policy"][
         "generation"
     ].get("vllm_cfg", {}).get("async_engine", False)
-    if is_vllm_metrics_logger_enabled:
-        vllm_logger_metrics = metrics["vllm_logger_metrics"]
-        # vllm_logger_me    trics: dict[str (metric_name), dict[int (dp_idx), list[int] (metric_values)]]
+    generation_logger_metrics = metrics.get("generation_logger_metrics", {})
+    if is_vllm_metrics_logger_enabled and generation_logger_metrics:
+        vllm_logger_metrics = generation_logger_metrics
+        # vllm_logger_metrics: dict[str (metric_name), dict[int (dp_idx), list[int] (metric_values)]]
         # metric_name: "inflight_batch_sizes" or "num_pending_samples"
 
         assert "inflight_batch_sizes" in vllm_logger_metrics, (
diff --git a/nemo_rl/data/__init__.py b/nemo_rl/data/__init__.py
index 3e40c9d78c..63322b8fea 100644
--- a/nemo_rl/data/__init__.py
+++ b/nemo_rl/data/__init__.py
@@ -15,32 +15,55 @@
 from typing import Literal, NotRequired, TypedDict
 
 
-# TODO: split this typed dict up so it can be PreferenceDataConfig | ResponseDataConfig | etc
+class ResponseDatasetConfig(TypedDict):
+    dataset_name: NotRequired[str]
+    data_path: NotRequired[str]
+    input_key: NotRequired[str]
+    output_key: NotRequired[str]
+    split: NotRequired[str]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+    env_name: NotRequired[str]
+    processor: NotRequired[str]  # remove once processor is refactored
+    download_dir: NotRequired[str]
+    # Size of the validation data
+    split_validation_size: NotRequired[float]
+    # Seed for train/validation split when split_validation_size > 0
+    seed: NotRequired[int]
+
+
+# TODO: split this typed dict up so it can be PreferenceDatasetConfig | ResponseDatasetConfig | etc
 #       so that we can type check the configs more rigorously as opposed to saying everything
 #       is not required.
 class DataConfig(TypedDict):
     max_input_seq_length: int
-    prompt_file: NotRequired[str | None]
-    system_prompt_file: NotRequired[str | None]
-    dataset_name: str
-    val_dataset_name: NotRequired[str]
     add_bos: NotRequired[bool]
     add_eos: NotRequired[bool]
-    input_key: NotRequired[str]
-    output_key: NotRequired[str | None]
     add_generation_prompt: NotRequired[bool]
     add_system_prompt: NotRequired[bool]
-    split: NotRequired[str | None]
     shuffle: bool
-    seed: NotRequired[int | None]
-    download_dir: NotRequired[str]
-    train_data_path: NotRequired[str]
-    val_data_paths: NotRequired[dict[str, str]]
     # Number of data loader workers.
     # Set to 8 or 10 for large batches to improve loading speed.
     # This saturates CPU threads without consuming too much memory
     # However, setting it too high might cause memory issues for long seqlens.
     num_workers: NotRequired[int]
+    # dataset configs
+    # TODO: remove NotRequired once preference dataset is refactored
+    train: NotRequired[ResponseDatasetConfig]
+    validation: NotRequired[ResponseDatasetConfig | None]
+    default: NotRequired[ResponseDatasetConfig | None]
+    # TODO: remove once preference dataset is refactored
+    dataset_name: NotRequired[str]
+    val_dataset_name: NotRequired[str]
+    input_key: NotRequired[str]
+    output_key: NotRequired[str | None]
+    split: NotRequired[str]
+    train_data_path: NotRequired[str]
+    val_data_paths: NotRequired[dict[str, str]]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+    env_name: NotRequired[str]
+    processor: NotRequired[str]  # remove once processor is refactored
 
 
 # ===============================================================================
diff --git a/nemo_rl/data/datasets/__init__.py b/nemo_rl/data/datasets/__init__.py
index f859705dba..a4747b7114 100644
--- a/nemo_rl/data/datasets/__init__.py
+++ b/nemo_rl/data/datasets/__init__.py
@@ -11,11 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from nemo_rl.data.datasets.eval_datasets import load_eval_dataset
 from nemo_rl.data.datasets.preference_datasets import load_preference_dataset
 from nemo_rl.data.datasets.processed_dataset import AllTaskProcessedDataset
 from nemo_rl.data.datasets.response_datasets import load_response_dataset
-from nemo_rl.data.datasets.utils import assert_no_double_bos
+from nemo_rl.data.datasets.utils import (
+    assert_no_double_bos,
+    extract_necessary_env_names,
+    update_single_dataset_config,
+)
 
 __all__ = [
     "AllTaskProcessedDataset",
@@ -23,4 +28,6 @@
     "load_preference_dataset",
     "load_response_dataset",
     "assert_no_double_bos",
+    "extract_necessary_env_names",
+    "update_single_dataset_config",
 ]
diff --git a/nemo_rl/data/datasets/processed_dataset.py b/nemo_rl/data/datasets/processed_dataset.py
index 906ab591fc..ea1cbf87d3 100644
--- a/nemo_rl/data/datasets/processed_dataset.py
+++ b/nemo_rl/data/datasets/processed_dataset.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import Any, Optional, Union
 
 import torch
@@ -55,17 +56,18 @@ def __init__(
     ):
         self.dataset = dataset
         self.tokenizer = tokenizer
+        # TODO: will be removed once preference dataset is refactored
         self.default_task_data_spec = default_task_data_spec
         self.task_data_processors = task_data_processors
         self.max_seq_length = max_seq_length
         self._bos_checked = False
 
-        if isinstance(task_data_processors, dict):
+        if (
+            isinstance(task_data_processors, dict)
+            and default_task_data_spec is not None
+        ):
             # apply defaults to all task data specs
-            for task_name, (
-                task_data_spec,
-                task_data_processor,
-            ) in task_data_processors.items():
+            for _, (task_data_spec, _) in task_data_processors.items():
                 task_data_spec.copy_defaults(self.default_task_data_spec)
 
     def __len__(self) -> int:
diff --git a/nemo_rl/data/datasets/raw_dataset.py b/nemo_rl/data/datasets/raw_dataset.py
index e63217a469..c795480e49 100644
--- a/nemo_rl/data/datasets/raw_dataset.py
+++ b/nemo_rl/data/datasets/raw_dataset.py
@@ -12,18 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from datasets import Dataset
+
+from nemo_rl.data import ResponseDatasetConfig
 from nemo_rl.data.interfaces import TaskDataProcessFnCallable, TaskDataSpec
 from nemo_rl.data.processors import PROCESSOR_REGISTRY
 
 
 class RawDataset:
-    def __init__(self, data_config: dict, seed: int = 42):
-        self.data_config: dict = data_config
-        self.seed: int = seed
-        self.task_name: str | None = None
-        self.processor: TaskDataProcessFnCallable | None = None
-        self.task_spec: TaskDataSpec | None = None
-        raise NotImplementedError("__init__ is not implemented")
+    # change to ResponseDatasetConfig | PreferenceDatasetConfig once preference dataset is refactored
+    data_config: ResponseDatasetConfig
+    dataset: Dataset
+    # `val_dataset` is used only when current dataset is used for both training and validation
+    val_dataset: Dataset | None
+    processor: TaskDataProcessFnCallable
+    task_spec: TaskDataSpec
+
+    def split_train_validation(self, test_size: float, seed: int):
+        if test_size > 0:
+            split_dataset = self.dataset.train_test_split(
+                test_size=test_size, seed=seed
+            )
+            self.dataset = split_dataset["train"]
+            self.val_dataset = split_dataset["test"]
 
     def set_processor(self):
         processor_name = (
@@ -36,7 +47,7 @@ def set_processor(self):
         )
         self.processor = PROCESSOR_REGISTRY[processor_name]
 
-    def set_task_spec(self, data_config: dict):
+    def set_task_spec(self, data_config: ResponseDatasetConfig):
         self.data_config = data_config
         system_prompt_file = self.data_config.get("system_prompt_file", None)
         prompt_file = self.data_config.get("prompt_file", None)
diff --git a/nemo_rl/data/datasets/response_datasets/__init__.py b/nemo_rl/data/datasets/response_datasets/__init__.py
index a259b8a152..b0730c654a 100644
--- a/nemo_rl/data/datasets/response_datasets/__init__.py
+++ b/nemo_rl/data/datasets/response_datasets/__init__.py
@@ -11,10 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any
 
+from nemo_rl.data import ResponseDatasetConfig
+from nemo_rl.data.datasets.response_datasets.aime24 import AIME2024Dataset
 from nemo_rl.data.datasets.response_datasets.clevr import CLEVRCoGenTDataset
-from nemo_rl.data.datasets.response_datasets.dapo_math import DAPOMath17KDataset
+from nemo_rl.data.datasets.response_datasets.dapo_math import (
+    DAPOMath17KDataset,
+    DAPOMathAIME2024Dataset,
+)
 from nemo_rl.data.datasets.response_datasets.deepscaler import DeepScalerDataset
 from nemo_rl.data.datasets.response_datasets.geometry3k import Geometry3KDataset
 from nemo_rl.data.datasets.response_datasets.helpsteer3 import HelpSteer3Dataset
@@ -29,101 +33,36 @@
 from nemo_rl.data.datasets.response_datasets.response_dataset import ResponseDataset
 from nemo_rl.data.datasets.response_datasets.squad import SquadDataset
 from nemo_rl.data.datasets.response_datasets.tulu3 import Tulu3SftMixtureDataset
-from nemo_rl.data.datasets.utils import get_extra_kwargs
+
+DATASET_REGISTRY = {
+    # built-in datasets
+    "AIME2024": AIME2024Dataset,
+    "clevr-cogent": CLEVRCoGenTDataset,
+    "DAPOMath17K": DAPOMath17KDataset,
+    "DAPOMathAIME2024": DAPOMathAIME2024Dataset,
+    "DeepScaler": DeepScalerDataset,
+    "geometry3k": Geometry3KDataset,
+    "HelpSteer3": HelpSteer3Dataset,
+    "open_assistant": OasstDataset,
+    "OpenMathInstruct-2": OpenMathInstruct2Dataset,
+    "refcoco": RefCOCODataset,
+    "squad": SquadDataset,
+    "tulu3_sft_mixture": Tulu3SftMixtureDataset,
+    # load from local JSONL file or HuggingFace
+    "openai_format": OpenAIFormatDataset,
+    "ResponseDataset": ResponseDataset,
+}
 
 
-# TODO: refactor this to use the new processor interface and RawDataset interface. https://github.com/NVIDIA-NeMo/RL/issues/1552
-def load_response_dataset(data_config, seed: int = 42):
+def load_response_dataset(data_config: ResponseDatasetConfig):
     """Loads response dataset."""
     dataset_name = data_config["dataset_name"]
 
-    # TODO @yukih: remove duplicated dataset_name (openmathinstruct2, clevr_cogent)
-    # for sft training
-    if dataset_name == "open_assistant":
-        base_dataset = OasstDataset(
-            output_dir="/tmp/open_assistant",
-            seed=seed,
-        )
-    elif dataset_name == "squad":
-        base_dataset = SquadDataset()
-    elif dataset_name == "openmathinstruct2":
-        base_dataset = OpenMathInstruct2Dataset(
-            split=data_config["split"],
-            output_key=data_config["output_key"],
-            prompt_file=data_config["prompt_file"],
-            seed=seed,
-        )
-    elif dataset_name == "clevr_cogent":
-        base_dataset = CLEVRCoGenTDataset(
-            split=data_config["split"],
-            prompt_file=data_config["prompt_file"],
-        )
-    elif dataset_name == "openai_format":
-        base_dataset = OpenAIFormatDataset(
-            data_config["train_data_path"],
-            data_config["val_data_path"],
-            data_config["chat_key"],
-            data_config["system_key"],
-            data_config["system_prompt"],
-            data_config["tool_key"],
-            data_config["use_preserving_dataset"],
-        )
-    # for rl training
-    elif dataset_name == "OpenMathInstruct-2":
-        print("Loading nvidia/OpenMathInstruct2Dataset for training and validation")
-        base_dataset: Any = OpenMathInstruct2Dataset(seed=seed)
-    elif dataset_name == "DeepScaler":
-        print(
-            "Loading agentica-org/DeepScaleR-Preview-Dataset for training and validation"
-        )
-        base_dataset: Any = DeepScalerDataset(seed=seed)
-    elif dataset_name == "DAPOMath17K":
-        print(
-            "Loading BytedTsinghua-SIA/DAPO-Math-17k for training and AIME 2024 for validation"
-        )
-        base_dataset: Any = DAPOMath17KDataset(seed=seed)
-    # for vlm rl training
-    elif dataset_name == "clevr-cogent":
-        base_dataset: Any = CLEVRCoGenTDataset(
-            split=data_config["split"],
-        )
-    elif dataset_name == "refcoco":
-        base_dataset: Any = RefCOCODataset(
-            split=data_config["split"],
-            download_dir=data_config["download_dir"],
-        )
-    elif dataset_name == "geometry3k":
-        base_dataset: Any = Geometry3KDataset(
-            split=data_config["split"],
-        )
-    elif dataset_name == "tulu3_sft_mixture":
-        base_dataset: Any = Tulu3SftMixtureDataset(
-            test_size=data_config.get("test_size", 0.05),
-            prompt_file=data_config.get("prompt_file", None),
-            max_samples=data_config.get("max_samples", None),
-            seed=seed,
-        )
-    elif dataset_name == "HelpSteer3":
-        base_dataset: Any = HelpSteer3Dataset()
-    # fall back to load from JSON file
-    elif dataset_name == "ResponseDataset":
-        if "train_data_path" not in data_config:
-            raise ValueError(
-                "train_data_path is required when dataset_name is not one of the built-ins."
-            )
-        extra_kwargs = get_extra_kwargs(
-            data_config,
-            [
-                "val_data_path",
-                "input_key",
-                "output_key",
-                "train_split",
-                "val_split",
-            ],
-        )
-        base_dataset = ResponseDataset(
-            train_data_path=data_config["train_data_path"],
-            **extra_kwargs,
+    # load dataset
+    if dataset_name in DATASET_REGISTRY:
+        dataset_class = DATASET_REGISTRY[dataset_name]
+        dataset = dataset_class(
+            **data_config  # pyrefly: ignore[missing-argument]  `data_path` is required for some classes
         )
     else:
         raise ValueError(
@@ -132,33 +71,27 @@ def load_response_dataset(data_config, seed: int = 42):
             "or set dataset_name=ResponseDataset to load from local JSONL file or HuggingFace."
         )
 
-    base_dataset.set_task_spec(data_config)
-    # Skip sft datasets, the run_sft.py has not been refactored yet.
-    # TODO: refactor run_sft.py to use the new processor interface. https://github.com/NVIDIA-NeMo/RL/issues/1552
-    if dataset_name not in [
-        "open_assistant",
-        "squad",
-        "openmathinstruct2",
-        "clevr_cogent",
-        "openai_format",
-        "tulu3_sft_mixture",
-    ]:
-        base_dataset.set_processor()
+    dataset.set_task_spec(data_config)
+    # Remove this after the data processor is refactored. https://github.com/NVIDIA-NeMo/RL/issues/1658
+    dataset.set_processor()
 
-    return base_dataset
+    return dataset
 
 
 __all__ = [
+    "AIME2024Dataset",
     "CLEVRCoGenTDataset",
-    "DeepScalerDataset",
     "DAPOMath17KDataset",
+    "DAPOMathAIME2024Dataset",
+    "DeepScalerDataset",
     "Geometry3KDataset",
-    "OpenAIFormatDataset",
+    "HelpSteer3Dataset",
     "OasstDataset",
+    "OpenAIFormatDataset",
     "OpenMathInstruct2Dataset",
     "RefCOCODataset",
     "ResponseDataset",
     "SquadDataset",
     "Tulu3SftMixtureDataset",
-    "HelpSteer3Dataset",
+    "load_response_dataset",
 ]
diff --git a/nemo_rl/data/datasets/response_datasets/aime24.py b/nemo_rl/data/datasets/response_datasets/aime24.py
new file mode 100644
index 0000000000..cb9c7b0395
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/aime24.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from datasets import load_dataset
+
+from nemo_rl.data.datasets.raw_dataset import RawDataset
+
+
+class AIME2024Dataset(RawDataset):
+    """Simple wrapper around the AIME2024 dataset with train split.
+
+    Args:
+        repeat: Number of times to repeat the dataset, default is 16
+    """
+
+    def __init__(self, repeat: int = 16, **kwargs) -> None:
+        self.task_name = "AIME2024"
+
+        # load from huggingface
+        self.dataset = load_dataset("HuggingFaceH4/aime_2024", split="train")
+
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
+        )
+
+        # repeat the dataset
+        self.dataset = self.dataset.repeat(repeat)
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "messages": [
+                {"role": "user", "content": data["problem"]},
+                {"role": "assistant", "content": data["answer"]},
+            ],
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/clevr.py b/nemo_rl/data/datasets/response_datasets/clevr.py
index 30bf67b47f..775b67e8b2 100644
--- a/nemo_rl/data/datasets/response_datasets/clevr.py
+++ b/nemo_rl/data/datasets/response_datasets/clevr.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, Optional
+from typing import Any
 
 from datasets import load_dataset
 
@@ -52,68 +52,38 @@ def format_clevr_cogent_dataset(
     ret = {
         "messages": [
             {"role": "user", "content": user_content},
-            {
-                "role": "assistant",
-                "content": assistant_content,
-            },
+            {"role": "assistant", "content": assistant_content},
         ],
-        "task_name": "clevr-cogent",
+        "task_name": example["task_name"],
     }
     return ret
 
 
-# contain different variants of the CLEVR dataset
-def prepare_clevr_cogent_dataset(
-    split: str = "trainA", task_name: Optional[str] = None
-):
-    if task_name is None:
-        task_name = "clevr-cogent"
-
-    if split == "trainA":
-        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_TrainA_70K_Complex")[
-            "train"
-        ]
-        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
-    elif split == "trainB":
-        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_TrainA_70K_Complex")[
-            "train"
-        ]
-        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
-    elif split == "valA":
-        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
-        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
-    elif split == "valB":
-        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
-        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
-
-    # format - disable features to avoid schema conflicts
-    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
-    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
-
-    return {
-        "train": tr_dataset,
-        "validation": val_dataset,
-    }
-
-
 class CLEVRCoGenTDataset(RawDataset):
-    def __init__(
-        self,
-        split: str = "trainA",
-        prompt_file: Optional[str] = None,
-    ):
-        """Simple wrapper around the CLEVR-CoGenT dataset.
-
-        Args:
-            split: The split of the dataset to use.
-            prompt_file: The file containing the prompt for the dataset.
-        """
-        if split not in ["trainA", "trainB", "valA", "valB"]:
+    """Simple wrapper around the CLEVR-CoGenT dataset.
+
+    Args:
+        split: Split name for the dataset, default is "train"
+    """
+
+    def __init__(self, split: str = "train", **kwargs):
+        # train, valA, and valB are supported splits.
+        SPLIT_TO_HF_NAME = {
+            "train": "MMInstruction/Clevr_CoGenT_TrainA_70K_Complex",
+            "valA": "MMInstruction/Clevr_CoGenT_ValA",
+            "valB": "MMInstruction/Clevr_CoGenT_ValB",
+        }
+        if split not in SPLIT_TO_HF_NAME:
             raise ValueError(
-                f"Invalid split: {split}. Please use 'trainA', 'trainB', 'valA', or 'valB'."
+                f"Invalid split: {split}. Please use 'train', 'valA', or 'valB'."
             )
+
         self.task_name = "clevr-cogent"
 
-        self.formatted_ds = prepare_clevr_cogent_dataset(
-            split=split, task_name=self.task_name
+        # this dataset will process the image during training using `format_clevr_cogent_dataset`
+        self.dataset = load_dataset(SPLIT_TO_HF_NAME[split])["train"]
+
+        # format - disable features to avoid schema conflicts
+        self.dataset = self.dataset.add_column(
+            "task_name", [self.task_name] * len(self.dataset)
         )
diff --git a/nemo_rl/data/datasets/response_datasets/dapo_math.py b/nemo_rl/data/datasets/response_datasets/dapo_math.py
index 3a9988923b..096c6fe835 100644
--- a/nemo_rl/data/datasets/response_datasets/dapo_math.py
+++ b/nemo_rl/data/datasets/response_datasets/dapo_math.py
@@ -12,72 +12,54 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from typing import Any
 
-from datasets import Dataset, load_dataset
+from datasets import load_dataset
 
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-def format_dapo_math_17k(
-    data: dict[str, str | float | int],
-    task_name: str = "DAPOMath17K",
-) -> dict[str, list[Any] | str]:
-    return {
-        "messages": [
-            {
-                "role": "user",
-                "content": data["prompt"][0]["content"],
-            },
-            {
-                "role": "assistant",
-                "content": data["reward_model"]["ground_truth"],
-            },
-        ],
-        "task_name": task_name,
-    }
-
+class DAPOMath17KDataset(RawDataset):
+    """Simple wrapper around the DAPO Math 17K dataset with train split."""
 
-def prepare_dapo_math_17k_dataset(
-    seed: int = 42, task_name: str = "DAPOMath17K"
-) -> dict[str, Dataset | None]:
-    """Load and split the DeepScaler dataset into train and test sets."""
-    # Load the original dataset for training
-    train_ds = load_dataset("BytedTsinghua-SIA/DAPO-Math-17k", split="train")
+    def __init__(self, **kwargs) -> None:
+        self.task_name = "DAPOMath17K"
 
-    # Load hendrydong/aime24 dataset for validation
-    val_ds = load_dataset("BytedTsinghua-SIA/AIME-2024", split="train")
+        # load from huggingface
+        self.dataset = load_dataset("BytedTsinghua-SIA/DAPO-Math-17k", split="train")
 
-    # Shuffle the training dataset with the specified seed
-    train_ds = train_ds.shuffle(seed=seed)
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
+        )
 
-    # Format the examples, removing original columns
-    train_formatted = train_ds.map(
-        format_dapo_math_17k,
-        remove_columns=train_ds.column_names,
-        fn_kwargs={"task_name": task_name},
-    )
-    val_formatted = val_ds.map(
-        format_dapo_math_17k,
-        remove_columns=val_ds.column_names,
-        fn_kwargs={"task_name": task_name},
-    )
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": data["prompt"][0]["content"],
+                },
+                {
+                    "role": "assistant",
+                    "content": data["reward_model"]["ground_truth"],
+                },
+            ],
+            "task_name": self.task_name,
+        }
 
-    return {
-        "train": train_formatted,
-        "validation": val_formatted,
-    }
 
+class DAPOMathAIME2024Dataset(DAPOMath17KDataset):
+    def __init__(self, **kwargs) -> None:
+        """Initialize the DAPO Math AIME 2024 dataset with train split."""
+        self.task_name = "DAPOMathAIME2024"
 
-class DAPOMath17KDataset(RawDataset):
-    def __init__(self, seed: int = 42) -> None:
-        """Initialize the DAPO Math 17K dataset with train split.
+        # load from huggingface
+        self.dataset = load_dataset("BytedTsinghua-SIA/AIME-2024", split="train")
 
-        Args:
-            seed: Random seed for reproducible splitting
-        """
-        self.task_name = "DAPOMath17K"
-        self.formatted_ds = prepare_dapo_math_17k_dataset(
-            seed=seed, task_name=self.task_name
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
         )
diff --git a/nemo_rl/data/datasets/response_datasets/deepscaler.py b/nemo_rl/data/datasets/response_datasets/deepscaler.py
index 3465491225..7f6189281d 100644
--- a/nemo_rl/data/datasets/response_datasets/deepscaler.py
+++ b/nemo_rl/data/datasets/response_datasets/deepscaler.py
@@ -12,77 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from typing import Any
 
-from datasets import Dataset, load_dataset
+from datasets import load_dataset
 
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-def format_math(
-    data: dict[str, str | float | int], task_name: str = "DeepScaler"
-) -> dict[str, list[Any] | str]:
-    return {
-        "messages": [
-            {
-                "role": "user",
-                "content": data["problem"],
-            },
-            {
-                "role": "assistant",
-                "content": data["answer"],
-            },
-        ],
-        "task_name": task_name,
-    }
-
-
-def prepare_deepscaler_dataset(
-    seed: int = 42, task_name: str = "DeepScaler"
-) -> dict[str, Dataset | None]:
-    """Load and split the DeepScaler dataset into train and test sets."""
-    # Load the original dataset for training
-    train_ds = load_dataset("agentica-org/DeepScaleR-Preview-Dataset", split="train")
-
-    # Load hendrydong/aime24 dataset for validation
-    val_ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
-
-    # Shuffle the training dataset with the specified seed
-    train_ds = train_ds.shuffle(seed=seed)
-
-    # Format the examples, removing original columns
-    train_formatted = train_ds.map(
-        format_math,
-        remove_columns=train_ds.column_names,
-        fn_kwargs={"task_name": task_name},
-    )
-    val_formatted = val_ds.map(
-        format_math,
-        remove_columns=val_ds.column_names,
-        fn_kwargs={"task_name": task_name},
-    )
-
-    # Compute accuracy 16 times per sample (matching the DeepScaleR evaluation setting)
-    val_repeated = []
-    for _ in range(16):
-        val_repeated.extend(val_formatted)
-    val_formatted = val_formatted.from_list(val_repeated)
-
-    return {
-        "train": train_formatted,
-        "validation": val_formatted,
-    }
-
-
 class DeepScalerDataset(RawDataset):
-    def __init__(self, seed: int = 42) -> None:
-        """Initialize the DeepScaler dataset with train/test split.
+    """Simple wrapper around the DeepScaler dataset with train split."""
 
-        Args:
-            seed: Random seed for reproducible splitting
-        """
+    def __init__(self, **kwargs) -> None:
         self.task_name = "DeepScaler"
-        self.formatted_ds = prepare_deepscaler_dataset(
-            seed=seed, task_name=self.task_name
+
+        # load from huggingface
+        self.dataset = load_dataset(
+            "agentica-org/DeepScaleR-Preview-Dataset", split="train"
         )
+
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
+        )
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "messages": [
+                {"role": "user", "content": data["problem"]},
+                {"role": "assistant", "content": data["answer"]},
+            ],
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/geometry3k.py b/nemo_rl/data/datasets/response_datasets/geometry3k.py
index d45fb15127..429decb522 100644
--- a/nemo_rl/data/datasets/response_datasets/geometry3k.py
+++ b/nemo_rl/data/datasets/response_datasets/geometry3k.py
@@ -11,7 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional
+
+from typing import Any
 
 from datasets import load_dataset
 
@@ -24,11 +25,8 @@ def format_geometry3k_dataset(
 ) -> dict[str, Any]:
     """Format the Geometry3K dataset into an OpenAI-API-like message log."""
     # isolate single image
-    example["image"] = (
-        example["images"][0]
-        if isinstance(example["images"], list)
-        else example["images"]
-    )
+    if isinstance(example["images"], list):
+        example["image"] = example["images"][0]
 
     user_content = [
         {
@@ -48,50 +46,32 @@ def format_geometry3k_dataset(
     ret = {
         "messages": [
             {"role": "user", "content": user_content},
-            {
-                "role": "assistant",
-                "content": assistant_content,
-            },
+            {"role": "assistant", "content": assistant_content},
         ],
-        "task_name": "geometry3k",
+        "task_name": example["task_name"],
     }
     return ret
 
 
-def prepare_geometry3k_dataset(split: str = "train", task_name: str = "geometry3k"):
-    if split == "train":
-        tr_dataset = load_dataset("hiyouga/geometry3k")["train"]
-        val_dataset = load_dataset("hiyouga/geometry3k")["validation"]
-    else:
-        tr_dataset = load_dataset("hiyouga/geometry3k")[split]
-        val_dataset = load_dataset("hiyouga/geometry3k")[split]
-
-    # format - disable features to avoid schema conflicts
-    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
-    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
-    return {
-        "train": tr_dataset,
-        "validation": val_dataset,
-    }
-
-
 class Geometry3KDataset(RawDataset):
-    def __init__(
-        self,
-        split: str = "train",
-        prompt_file: Optional[str] = None,
-    ):
-        """Simple wrapper around the Geometry3K dataset.
+    """Simple wrapper around the Geometry3K dataset.
+
+    Args:
+        split: Split name for the dataset, default is "train"
+    """
 
-        Args:
-            split: The split of the dataset to use.
-            prompt_file: The file containing the prompt for the dataset.
-        """
+    def __init__(self, split: str = "train", **kwargs):
+        # train, validation, and test are supported splits.
         assert split in ["train", "validation", "test"], (
             f"Invalid split: {split}. Please use 'train' or 'validation' or 'test'."
         )
+
         self.task_name = "geometry3k"
 
-        self.formatted_ds = prepare_geometry3k_dataset(
-            split=split, task_name=self.task_name
+        # this dataset will process the image during training using `format_geometry3k_dataset`
+        self.dataset = load_dataset("hiyouga/geometry3k")[split]
+
+        # format - disable features to avoid schema conflicts
+        self.dataset = self.dataset.add_column(
+            "task_name", [self.task_name] * len(self.dataset)
         )
diff --git a/nemo_rl/data/datasets/response_datasets/helpsteer3.py b/nemo_rl/data/datasets/response_datasets/helpsteer3.py
index 7d275634ef..af7e00be05 100644
--- a/nemo_rl/data/datasets/response_datasets/helpsteer3.py
+++ b/nemo_rl/data/datasets/response_datasets/helpsteer3.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from typing import Any
 
 from absl import logging
@@ -19,44 +20,49 @@
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-# Choose the chosen response as the response and the rejected response as the target
-def to_response_data_format(
-    data: dict[str, Any], task_name: str = "HelpSteer3"
-) -> dict:
-    response_1 = data["response1"]
-    response_2 = data["response2"]
-    overall_preference = data["overall_preference"]
-
-    if overall_preference < 0:
-        chosen = response_1
-    elif overall_preference == 0:
-        logging.log_every_n(
-            logging.WARNING,
-            "Preference is 0 for some examples! Setting chosen and rejected to response 1 since we don't know which response is better",
-            1000,
-        )
-        chosen = response_1
-    else:
-        chosen = response_2
-
-    if isinstance(data["context"], str):
-        context = [{"role": "user", "content": data["context"]}]
-    else:
-        context = data["context"]
+class HelpSteer3Dataset(RawDataset):
+    """Simple wrapper around the HelpSteer3 dataset with preference subset.
 
-    return {
-        "context": context,
-        "response": [{"role": "assistant", "content": chosen}],
-        "task_name": task_name,
-    }
+    Args:
+        split: Split name for the dataset, default is "train"
+    """
 
+    def __init__(self, split: str = "train", **kwargs):
+        self.task_name = "HelpSteer3"
 
-class HelpSteer3Dataset(RawDataset):
-    """HelpSteer3 preference dataset for DPO training."""
+        # load from huggingface
+        self.dataset = load_dataset("nvidia/HelpSteer3", "preference")[split]
 
-    def __init__(self) -> None:
-        ds = load_dataset("nvidia/HelpSteer3", "preference")
-        self.task_name = "HelpSteer3"
-        self.formatted_ds = ds.map(
-            to_response_data_format, fn_kwargs={"task_name": self.task_name}
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
         )
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        response_1 = data["response1"]
+        response_2 = data["response2"]
+        overall_preference = data["overall_preference"]
+
+        if overall_preference < 0:
+            chosen = response_1
+        elif overall_preference == 0:
+            logging.log_every_n(
+                logging.WARNING,
+                "Preference is 0 for some examples! Setting chosen and rejected to response 1 since we don't know which response is better",
+                1000,
+            )
+            chosen = response_1
+        else:
+            chosen = response_2
+
+        if isinstance(data["context"], str):
+            context = [{"role": "user", "content": data["context"]}]
+        else:
+            context = data["context"]
+
+        return {
+            "context": context,
+            "response": [{"role": "assistant", "content": chosen}],
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py b/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py
index 2dfb44aada..674940e88e 100644
--- a/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py
+++ b/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py
@@ -97,8 +97,7 @@ class OpenAIFormatDataset(RawDataset):
     }
 
     Args:
-        train_ds_path: Path to the training dataset JSON file
-        val_ds_path: Path to the validation dataset JSON file
+        data_path: Path to the dataset JSON file
         chat_key: Key for the messages list in the dataset (default: "messages")
         system_key: Optional key for system prompt in the dataset
         system_prompt: Optional system prompt to add if not in the dataset
@@ -121,36 +120,33 @@ class OpenAIFormatDataset(RawDataset):
 
     def __init__(
         self,
-        train_ds_path: str,
-        val_ds_path: str,
+        data_path: str,
         chat_key: str = "messages",
         system_key: str | None = None,
         system_prompt: str | None = None,
         tool_key: str | None = "tools",
         use_preserving_dataset: bool = False,
+        **kwargs,
     ):
         self.chat_key = chat_key
         self.system_key = system_key
         self.system_prompt = system_prompt
         self.tool_key = tool_key
-        self.task_name = "json_dataset"
+        self.task_name = data_path.split("/")[-1].split(".")[0]
+
         if not use_preserving_dataset:
             # Use the standard HuggingFace approach (faster and more standard)
-            train_original_dataset = load_dataset("json", data_files=train_ds_path)[
-                "train"
-            ]
-            val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"]
-
-            formatted_train_dataset = train_original_dataset.map(self.add_messages_key)
-            formatted_val_dataset = val_original_dataset.map(self.add_messages_key)
+            original_dataset = load_dataset("json", data_files=data_path)["train"]
+            # Format the dataset
+            self.dataset = original_dataset.map(self.format_data)
 
             print(
-                f"Loaded dataset using standard approach (train: {len(formatted_train_dataset)}, val: {len(formatted_val_dataset)})"
+                f"Loaded dataset using standard approach: {len(self.dataset)} samples."
             )
 
             # Warn if tools are present in the dataset
             if self.tool_key and any(
-                self.tool_key in sample for sample in formatted_train_dataset
+                self.tool_key in sample for sample in self.dataset
             ):
                 warnings.warn(
                     "Tools detected in dataset. Set use_preserving_dataset=True to preserve heterogeneous tool schemas. "
@@ -173,46 +169,28 @@ def __init__(
             )
 
             # Load JSON files directly
-            with open(train_ds_path, "r") as f:
-                train_data = [json.loads(line) for line in f]
-
-            with open(val_ds_path, "r") as f:
-                val_data = [json.loads(line) for line in f]
-
-            # Apply transformations
-            formatted_train_data = [self.add_messages_key(item) for item in train_data]
-            formatted_val_data = [self.add_messages_key(item) for item in val_data]
-
+            with open(data_path, "r") as f:
+                original_dataset = [json.loads(line) for line in f]
+            # Format the dataset
+            formatted_data = [self.format_data(item) for item in original_dataset]
             # Use PreservingDataset to maintain exact structure
-            formatted_train_dataset = PreservingDataset(formatted_train_data)
-            formatted_val_dataset = PreservingDataset(formatted_val_data)
+            self.dataset = PreservingDataset(formatted_data)
 
             print(
-                f"Loaded dataset using PreservingDataset (train: {len(formatted_train_dataset)}, val: {len(formatted_val_dataset)})"
+                f"Loaded dataset using PreservingDataset: {len(self.dataset)} samples."
             )
 
-        self.formatted_ds = {
-            "train": formatted_train_dataset,
-            "validation": formatted_val_dataset,
-        }
-        self.task_name = "json_dataset"
-
-    def add_messages_key(
-        self,
-        example: dict[str, Any],
-    ) -> dict[str, list[dict[str, Any]]]:
-        messages = [message for message in example[self.chat_key]]
-        if self.system_key is not None and self.system_key in example:
-            messages = [
-                {"role": "system", "content": example[self.system_key]}
-            ] + messages
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        messages = [message for message in data[self.chat_key]]
+        if self.system_key is not None and self.system_key in data:
+            messages = [{"role": "system", "content": data[self.system_key]}] + messages
         elif self.system_prompt:
             messages = [{"role": "system", "content": self.system_prompt}] + messages
         assert messages[-1]["role"] == "assistant"
 
         # Preserve tools if they exist in the data
-        result = {"messages": messages}
-        if self.tool_key and self.tool_key in example:
-            result["tools"] = example[self.tool_key]
+        result = {"messages": messages, "task_name": self.task_name}
+        if self.tool_key and self.tool_key in data:
+            result["tools"] = data[self.tool_key]
 
         return result
diff --git a/nemo_rl/data/datasets/response_datasets/oasst.py b/nemo_rl/data/datasets/response_datasets/oasst.py
index 327bc52b8f..e76316e77e 100644
--- a/nemo_rl/data/datasets/response_datasets/oasst.py
+++ b/nemo_rl/data/datasets/response_datasets/oasst.py
@@ -15,10 +15,9 @@
 import copy
 import gzip
 import json
-import os
-import random
 
-import requests
+from datasets import Dataset
+from huggingface_hub import hf_hub_download
 
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
@@ -67,7 +66,7 @@ def parse_conversations(tree_obj, first: bool = False):
     return all_conversations
 
 
-def get_data_records(objs, task_name: str = "OASST"):
+def get_data_records(objs, task_name: str = "oasst"):
     ## TODO: old format was multi-conversation per example, but ours is single conversation
     ## is this just because of the input data format?
     output = []
@@ -87,46 +86,31 @@ def get_data_records(objs, task_name: str = "OASST"):
     return output
 
 
-def download_and_process_oasst(
-    output_directory: str = ".",
-    seed: int = 42,
-    task_name: str = "OASST",
-    split_ratio: float = 0.95,
-) -> dict[str, list]:
-    os.makedirs(output_directory, exist_ok=True)
-    filename = f"{output_directory}/2023-04-12_oasst_all.trees.jsonl.gz"
-
-    # only download if doesn't exist
-    if not os.path.isfile(filename):
-        url = "https://huggingface.co/datasets/OpenAssistant/oasst1/resolve/main/2023-04-12_oasst_all.trees.jsonl.gz"
-        response = requests.get(url)
-        with open(filename, mode="wb") as fw:
-            fw.write(response.content)
-
-    with gzip.open(filename) as f:
-        file_content = f.readlines()
-
-    all_objs = [json.loads(dp.decode("utf-8")) for dp in file_content]
+class OasstDataset(RawDataset):
+    """Simple wrapper around the OASST dataset.
 
-    random.seed(seed)
-    random.shuffle(all_objs)
-    train_num = int(len(all_objs) * split_ratio)
-    train_objs = all_objs[:train_num]
-    val_objs = all_objs[train_num:]
-    train_records = get_data_records(train_objs, task_name=task_name)
-    val_records = get_data_records(val_objs, task_name=task_name)
+    Args:
+        split_validation_size: Size of the validation data, default is 0.05
+        seed: Seed for train/validation split when split_validation_size > 0, default is 42
+    """
 
-    formatted_ds = {
-        "train": train_records,
-        "validation": val_records,
-    }
+    def __init__(self, split_validation_size: float = 0.05, seed: int = 42, **kwargs):
+        self.task_name = "oasst"
 
-    return formatted_ds
+        # load from huggingface
+        filename = hf_hub_download(
+            repo_id="OpenAssistant/oasst1",
+            filename="2023-04-12_oasst_all.trees.jsonl.gz",
+            repo_type="dataset",
+        )
+        with gzip.open(filename) as f:
+            file_content = f.readlines()
 
+        # format the dataset
+        all_objs = [json.loads(dp.decode("utf-8")) for dp in file_content]
+        self.dataset = get_data_records(all_objs, task_name=self.task_name)
+        self.dataset = Dataset.from_list(self.dataset)
 
-class OasstDataset(RawDataset):
-    def __init__(self, output_dir: str = ".", seed: int = 42) -> None:
-        self.task_name = "OASST"
-        self.formatted_ds = download_and_process_oasst(
-            output_dir, seed, task_name=self.task_name
-        )
+        # `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+        self.val_dataset = None
+        self.split_train_validation(split_validation_size, seed)
diff --git a/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py b/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py
index f2bb228427..1b2c651997 100644
--- a/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py
+++ b/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py
@@ -12,96 +12,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Any
 
-from typing import Any, Optional
-
-from datasets import Dataset, load_dataset
+from datasets import load_dataset
 
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-def format_math(
-    data: dict[str, str | float | int],
-    output_key: str = "expected_answer",
-    task_name: str = "OpenMathInstruct-2",
-) -> dict[str, list[Any] | str]:
-    return {
-        "messages": [
-            {
-                "role": "user",
-                "content": data["problem"],
-            },
-            {
-                "role": "assistant",
-                "content": data[output_key],
-            },
-        ],
-        "task_name": task_name,
-    }
-
-
-def prepare_openinstructmath2_dataset(
-    split: str = "train_1M",
-    seed: int = 42,
-    test_size: float = 0.05,
-    output_key: str = "expected_answer",
-    task_name: str = "OpenMathInstruct-2",
-) -> dict[str, Dataset | None]:
-    """Load and split the OpenMathInstruct-2 dataset into train and validation sets using HF's train_test_split."""
-    print(
-        "WARNING: For reproducible experiments, preprocess the dataset once and define your own HfDataset subclass that directly uses the preprocessed datasets."
-    )
-
-    # Load the original dataset
-    original_ds = load_dataset("nvidia/OpenMathInstruct-2", split=split)
-
-    # Split into train and validation sets using HF's train_test_split
-    split_ds = original_ds.train_test_split(test_size=test_size, seed=seed)
-
-    # Format the examples, removing original columns
-    train_formatted = split_ds["train"].map(
-        format_math,
-        remove_columns=split_ds["train"].column_names,
-        fn_kwargs={"output_key": output_key, "task_name": task_name},
-    )
-    val_formatted = split_ds["test"].map(
-        format_math,
-        remove_columns=split_ds["test"].column_names,
-        fn_kwargs={"output_key": output_key, "task_name": task_name},
-    )
-
-    return {
-        "train": train_formatted,
-        "validation": val_formatted,
-    }
+class OpenMathInstruct2Dataset(RawDataset):
+    """Simple wrapper around the OpenMathInstruct2 dataset.
 
+    Args:
+        output_key: Key for the output text, default is "expected_answer"
+        split: Split name for the dataset, default is "train_1M"
+        split_validation_size: Size of the validation data, default is 0.05
+        seed: Seed for train/validation split when split_validation_size > 0, default is 42
+    """
 
-class OpenMathInstruct2Dataset(RawDataset):
     def __init__(
         self,
+        output_key: str = "expected_answer",
         split: str = "train_1M",
+        split_validation_size: float = 0.05,
         seed: int = 42,
-        test_size: float = 0.05,
-        output_key: str = "expected_answer",
-        prompt_file: Optional[str] = None,
+        **kwargs,
     ):
-        """Initialize the OpenMathInstruct2 dataset with train/validation split.
-
-        Args:
-            seed: Random seed for reproducible splitting
-            test_size: Proportion of data to use for validation (0.0-1.0)
-        """
         # train, train_1M, train_2M, and train_5M are supported splits.
         if split not in ["train", "train_1M", "train_2M", "train_5M"]:
             raise ValueError(
                 f"Invalid split: {split}. Please use 'train', 'train_1M', 'train_2M', or 'train_5M'."
             )
 
+        self.input_key = "problem"
+        self.output_key = output_key
         self.task_name = "OpenMathInstruct-2"
-        self.formatted_ds = prepare_openinstructmath2_dataset(
-            split=split,
-            seed=seed,
-            test_size=test_size,
-            output_key=output_key,
-            task_name=self.task_name,
+
+        # load from huggingface
+        self.dataset = load_dataset("nvidia/OpenMathInstruct-2", split=split)
+
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
         )
+
+        # `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+        self.val_dataset = None
+        self.split_train_validation(split_validation_size, seed)
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "messages": [
+                {"role": "user", "content": data[self.input_key]},
+                {"role": "assistant", "content": data[self.output_key]},
+            ],
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/refcoco.py b/nemo_rl/data/datasets/response_datasets/refcoco.py
index 9f32b1a12d..a8630e2c6b 100644
--- a/nemo_rl/data/datasets/response_datasets/refcoco.py
+++ b/nemo_rl/data/datasets/response_datasets/refcoco.py
@@ -15,8 +15,7 @@
 import os
 import random
 import zipfile
-from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any
 
 import requests
 from datasets import load_dataset
@@ -98,7 +97,6 @@ def format_refcoco_dataset(
     width: int = 256,
     height: int = 256,
     caption_type: str = "random",
-    prompt_file: Optional[str] = None,
 ) -> dict[str, Any]:
     """Format the RefCOCO dataset from huggingface.
 
@@ -158,101 +156,56 @@ def format_refcoco_dataset(
     ret = {
         "messages": [
             {"role": "user", "content": user_content},
-            {
-                "role": "assistant",
-                "content": solution,
-            },
+            {"role": "assistant", "content": solution},
         ],
-        "task_name": "refcoco",
+        "task_name": example["task_name"],
     }
     return ret
 
 
-# contain different variants of the CLEVR dataset
-def prepare_refcoco_dataset(
-    split: str = "default",
-    task_name: Optional[str] = None,
-    path_to_coco_images: Optional[Union[str, Path]] = None,
-):
-    if task_name is None:
-        task_name = "refcoco"
-
-    tr_dataset = load_dataset("jxu124/refcoco")["train"]
-    val_dataset = load_dataset("jxu124/refcoco")["validation"]
-
-    # format - disable features to avoid schema conflicts
-    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
-    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
-
-    if path_to_coco_images is None:
-        print("No path to coco images provided, downloading images to ./coco_images")
-        path_to_coco_images = Path("./coco_images")
-        os.makedirs(path_to_coco_images, exist_ok=True)
-    else:
-        path_to_coco_images = Path(path_to_coco_images)
-
-    # check for images
-    if not os.path.exists(str(path_to_coco_images / "train2014")):
-        print(f"Downloading train2014 images to {path_to_coco_images}")
-        download_and_unzip(
-            "http://images.cocodataset.org/zips/train2014.zip", str(path_to_coco_images)
-        )
-    if not os.path.exists(str(path_to_coco_images / "val2014")):
-        print(f"Downloading val2014 images to {path_to_coco_images}")
-        download_and_unzip(
-            "http://images.cocodataset.org/zips/val2014.zip", str(path_to_coco_images)
-        )
-
-    # add image column
-    tr_dataset = tr_dataset.map(
-        lambda example: {
-            **example,
-            "image_path": str(example["image_path"]).replace(
-                "coco/", str(path_to_coco_images) + "/"
-            )
-            if "image_path" in example
-            else example.get("image_path"),
-        }
-    )
-    val_dataset = val_dataset.map(
-        lambda example: {
-            **example,
-            "image_path": str(example["image_path"]).replace(
-                "coco/", str(path_to_coco_images) + "/"
-            )
-            if "image_path" in example
-            else example.get("image_path"),
-        }
-    )
-
-    return {
-        "train": tr_dataset,
-        "validation": val_dataset,
-    }
+class RefCOCODataset(RawDataset):
+    """Simple wrapper around the RefCOCO dataset.
 
+    Args:
+        split: Split name for the dataset, default is "train"
+        download_dir: Directory to download the dataset to, default is "./coco_images"
+    """
 
-class RefCOCODataset(RawDataset):
     def __init__(
         self,
-        split: str = "default",
-        prompt_file: Optional[str] = None,
-        download_dir: Optional[str] = None,
+        split: str = "train",
+        download_dir: str = "./coco_images",
+        **kwargs,
     ):
-        """Simple wrapper around the RefCOCO dataset.
-
-        Args:
-            split: The split of the dataset to use (currently only 'default' is supported)
-            prompt_file: The file containing the prompt for the dataset.
-        """
-        VALID_SPLITS = ["default"]
-        if split not in VALID_SPLITS:
+        # train and validation are supported splits.
+        SPLIT_TO_IMAGE_URL = {
+            "train": "http://images.cocodataset.org/zips/train2014.zip",
+            "validation": "http://images.cocodataset.org/zips/val2014.zip",
+        }
+        if split not in SPLIT_TO_IMAGE_URL:
             raise ValueError(
-                f"Invalid split: {split}. Please use one of {VALID_SPLITS}."
+                f"Invalid split: {split}. Please use 'train' or 'validation'."
             )
+
+        self.download_dir = download_dir
         self.task_name = "refcoco"
 
-        self.formatted_ds = prepare_refcoco_dataset(
-            split=split,
-            task_name=self.task_name,
-            path_to_coco_images=download_dir,
-        )
+        # check for images
+        filename = SPLIT_TO_IMAGE_URL[split].split("/")[-1].split(".")[0]
+        if not os.path.exists(f"{download_dir}/{filename}"):
+            print(f"Downloading {filename} images to {download_dir}")
+            download_and_unzip(SPLIT_TO_IMAGE_URL[split], download_dir)
+
+        # this dataset will process the image during training using `format_refcoco_dataset`
+        self.dataset = load_dataset("jxu124/refcoco")[split]
+        self.dataset = self.dataset.map(self.format_data)
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        image_path = None
+        if "image_path" in data:
+            image_path = data["image_path"].replace("coco/", self.download_dir + "/")
+
+        return {
+            "image_path": image_path,
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/response_dataset.py b/nemo_rl/data/datasets/response_datasets/response_dataset.py
index 15af21206e..3fa6acfa7a 100644
--- a/nemo_rl/data/datasets/response_datasets/response_dataset.py
+++ b/nemo_rl/data/datasets/response_datasets/response_dataset.py
@@ -29,56 +29,51 @@ class ResponseDataset(RawDataset):
     }
 
     Args:
-        train_data_path: Path to the JSON file containing training data
-        val_data_path: Path to the JSON file containing validation data
-        input_key: Key for the input text
-        output_key: Key for the output text
-        train_split: Split name for the training data, used for HuggingFace datasets, default is None
-        val_split: Split name for the validation data, used for HuggingFace datasets, default is None
+        data_path: Path to the dataset JSON file
+        input_key: Key for the input text, default is "input"
+        output_key: Key for the output text, default is "output"
+        split: Optional split name for the dataset, used for HuggingFace datasets
+        split_validation_size: Size of the validation data, default is 0
+        seed: Seed for train/validation split when split_validation_size > 0, default is 42
     """
 
     def __init__(
         self,
-        train_data_path: str,
-        val_data_path: Optional[str] = None,
+        data_path: str,
         input_key: str = "input",
         output_key: str = "output",
-        train_split: Optional[str] = None,
-        val_split: Optional[str] = None,
+        split: Optional[str] = None,
+        split_validation_size: float = 0,
+        seed: int = 42,
+        **kwargs,
     ):
         self.input_key = input_key
         self.output_key = output_key
-        self.task_name = "ResponseDataset"
-        # load from json file or huggingface
-        train_ds = load_dataset_from_path(train_data_path, train_split)
-        if val_data_path:
-            val_ds = load_dataset_from_path(val_data_path, val_split)
-        else:
-            val_ds = None
+        self.task_name = data_path.split("/")[-1].split(".")[0]
+
+        # load from local or huggingface
+        self.dataset = load_dataset_from_path(data_path, split)
 
-        # Only apply add_messages_key if 'messages' column doesn't exist
-        if "messages" not in train_ds.column_names:
-            train_ds = train_ds.map(
-                self.add_messages_key, fn_kwargs={"task_name": self.task_name}
+        # format the dataset
+        if "messages" not in self.dataset.column_names:
+            self.dataset = self.dataset.map(
+                self.format_data,
+                remove_columns=self.dataset.column_names,
             )
-        if val_ds is not None and "messages" not in val_ds.column_names:
-            val_ds = val_ds.map(
-                self.add_messages_key, fn_kwargs={"task_name": self.task_name}
+        else:
+            self.dataset = self.dataset.add_column(
+                "task_name", [self.task_name] * len(self.dataset)
             )
 
-        # store the formatted dataset
-        self.formatted_ds = {
-            "train": train_ds,
-            "validation": val_ds,
-        }
+        # `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+        self.val_dataset = None
+        self.split_train_validation(split_validation_size, seed)
 
-    def add_messages_key(
-        self, example: dict[str, Any], task_name: str = "ResponseDataset"
-    ) -> dict[str, str | list[dict[str, Any]]]:
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
         return {
             "messages": [
-                {"role": "user", "content": example[self.input_key]},
-                {"role": "assistant", "content": example[self.output_key]},
+                {"role": "user", "content": data[self.input_key]},
+                {"role": "assistant", "content": data[self.output_key]},
             ],
-            "task_name": task_name,
+            "task_name": self.task_name,
         }
diff --git a/nemo_rl/data/datasets/response_datasets/squad.py b/nemo_rl/data/datasets/response_datasets/squad.py
index c4e1023424..dba0f7c243 100644
--- a/nemo_rl/data/datasets/response_datasets/squad.py
+++ b/nemo_rl/data/datasets/response_datasets/squad.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 from typing import Any
 
 from datasets import load_dataset
@@ -20,27 +19,40 @@
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-def format_squad(data: dict[str, Any]) -> dict[str, list[dict[str, str]]]:
-    return {
-        "messages": [
-            {
-                "role": "system",
-                "content": data["context"],
-            },
-            {
-                "role": "user",
-                "content": data["question"],
-            },
-            {
-                "role": "assistant",
-                "content": data["answers"]["text"][0],
-            },
-        ]
-    }
-
-
 class SquadDataset(RawDataset):
-    def __init__(self) -> None:
-        original_ds = load_dataset("rajpurkar/squad")
-        self.task_name = "SQuAD"
-        self.formatted_ds = original_ds.map(format_squad)
+    """Simple wrapper around the squad dataset.
+
+    Args:
+        split: Split name for the dataset, default is "train"
+    """
+
+    def __init__(self, split: str = "train", **kwargs) -> None:
+        self.task_name = "squad"
+
+        # load from huggingface
+        self.dataset = load_dataset("rajpurkar/squad")[split]
+
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=self.dataset.column_names,
+        )
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        return {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": data["context"],
+                },
+                {
+                    "role": "user",
+                    "content": data["question"],
+                },
+                {
+                    "role": "assistant",
+                    "content": data["answers"]["text"][0],
+                },
+            ],
+            "task_name": self.task_name,
+        }
diff --git a/nemo_rl/data/datasets/response_datasets/tulu3.py b/nemo_rl/data/datasets/response_datasets/tulu3.py
index 9dc29dd83f..1e27d25a2f 100644
--- a/nemo_rl/data/datasets/response_datasets/tulu3.py
+++ b/nemo_rl/data/datasets/response_datasets/tulu3.py
@@ -19,74 +19,54 @@
 from nemo_rl.data.datasets.raw_dataset import RawDataset
 
 
-def format_tulu3_sft_mixture(
-    data: dict[str, Any], task_name: str = "tulu3_sft_mixture"
-) -> dict[str, str | dict[str, str]]:
-    """Format for Tulu3 SFT data."""
-    messages = data["messages"]
-
-    # Ensure last message is from assistant
-    if not messages or messages[-1]["role"] != "assistant":
-        raise ValueError(f"Expected last message to be from assistant, got: {messages}")
-
-    return {
-        "messages": messages,
-        "task_name": task_name,
-    }
-
-
 class Tulu3SftMixtureDataset(RawDataset):
-    """Tulu3 SFT mixture dataset."""
+    """Simple wrapper around the Tulu3 SFT mixture dataset with train split.
+
+    Args:
+        split_validation_size: Size of the validation data, default is 0.05
+        seed: Seed for train/validation split when split_validation_size > 0, default is 42
+        max_samples: Optional maximum number of samples to use from the dataset
+    """
 
     def __init__(
         self,
+        split_validation_size: float = 0.05,
         seed: int = 42,
-        test_size: float = 0.05,
-        prompt_file: str | None = None,
         max_samples: int | None = None,
+        **kwargs,
     ) -> None:
-        """Initialize the Tulu3 SFT mixture dataset.
-
-        Args:
-            seed: Random seed for train/validation split
-            test_size: Proportion of data to use for validation (0.0-1.0)
-            prompt_file: Optional prompt file path to be applied via TaskDataSpec
-            max_samples: Optional maximum number of samples to use from the dataset
-        """
         print(
             "WARNING: For reproducible experiments, preprocess the dataset once and define your own HfDataset subclass that directly uses the preprocessed datasets."
         )
 
         self.task_name = "tulu3_sft_mixture"
 
-        # Load the original dataset
-        original_ds = load_dataset(
-            path="allenai/tulu-3-sft-mixture",
-            trust_remote_code=True,
-        )["train"]  # This dataset only has a train split
+        # load from huggingface
+        self.dataset = load_dataset("allenai/tulu-3-sft-mixture")["train"]
 
         # Optionally limit the number of samples
         if max_samples is not None and max_samples > 0:
-            original_ds = original_ds.shuffle(seed=seed).select(
-                range(min(max_samples, len(original_ds)))
+            self.dataset = self.dataset.shuffle(seed=seed).select(
+                range(min(max_samples, len(self.dataset)))
             )
 
-        # Split into train and validation sets
-        split_ds = original_ds.train_test_split(test_size=test_size, seed=seed)
-
-        # Format the examples without any reasoning processing
-        train_formatted = split_ds["train"].map(
-            format_tulu3_sft_mixture,
-            remove_columns=split_ds["train"].column_names,
-            fn_kwargs={"task_name": self.task_name},
-        )
-        val_formatted = split_ds["test"].map(
-            format_tulu3_sft_mixture,
-            remove_columns=split_ds["test"].column_names,
-            fn_kwargs={"task_name": self.task_name},
+        # format the dataset
+        self.dataset = self.dataset.map(
+            self.format_data,
+            remove_columns=["id", "source"],
         )
 
-        self.formatted_ds = {
-            "train": train_formatted,
-            "validation": val_formatted,
-        }
+        # `self.val_dataset` is used (not None) only when current dataset is used for both training and validation
+        self.val_dataset = None
+        self.split_train_validation(split_validation_size, seed)
+
+    def format_data(self, data: dict[str, Any]) -> dict[str, Any]:
+        messages = data["messages"]
+
+        # Ensure last message is from assistant
+        if not messages or messages[-1]["role"] != "assistant":
+            raise ValueError(
+                f"Expected last message to be from assistant, got: {messages}"
+            )
+
+        return {"task_name": self.task_name}
diff --git a/nemo_rl/data/datasets/utils.py b/nemo_rl/data/datasets/utils.py
index eb78becc45..151c79d47d 100644
--- a/nemo_rl/data/datasets/utils.py
+++ b/nemo_rl/data/datasets/utils.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import base64
 import io
 import os
@@ -106,3 +107,34 @@ def get_extra_kwargs(data_config: dict, keys: list[str]) -> dict:
         if key in data_config:
             extra_kwargs[key] = data_config[key]
     return extra_kwargs
+
+
+def update_single_dataset_config(data_config: dict, default_data_config: dict) -> None:
+    """Fill the single dataset config with default dataset config."""
+    for key in default_data_config.keys():
+        if key not in data_config:
+            data_config[key] = default_data_config[key]
+
+
+def extract_necessary_env_names(data_config: dict) -> list[str]:
+    """Extract the necessary environment names from the data config.
+
+    Some environments are set in env_configs but not used in the data config.
+    This function extracts the necessary environment names from the data config.
+
+    Args:
+        data_config: The data config.
+
+    Returns:
+        The necessary environment names.
+    """
+    necessary_env_names = set()
+    keys = ["train", "validation", "default"]
+    for key in keys:
+        if (
+            key in data_config
+            and data_config[key] is not None
+            and "env_name" in data_config[key]
+        ):
+            necessary_env_names.add(data_config[key]["env_name"])
+    return list(necessary_env_names)
diff --git a/nemo_rl/data/interfaces.py b/nemo_rl/data/interfaces.py
index 05f10236c5..207b702bda 100644
--- a/nemo_rl/data/interfaces.py
+++ b/nemo_rl/data/interfaces.py
@@ -18,8 +18,11 @@
 import torch
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 
+from nemo_rl.data.multimodal_utils import PackedTensor
+
 # OpenAI-API-like message log, but every messsage may contain associated tensors (i.e. tokenized strings and logprobs) in addition to the original "content" string
 LLMMessageLogType = list[dict[str, Union[str, torch.Tensor]]]
+VLMMessageLogType = list[dict[str, Union[str, torch.Tensor, PackedTensor]]]
 
 # Flattened message log where all tensors and data are concatenated together for a conversation
 # Converts a conversation from list-of-turns format to key-value format with concatenated tensors
@@ -30,9 +33,9 @@
 
 
 class DatumSpec(TypedDict):
-    message_log: LLMMessageLogType
+    message_log: LLMMessageLogType | VLMMessageLogType
     length: int  # total (concatenated) length of the message tensors
-    extra_env_info: dict[str, Any]
+    extra_env_info: Optional[dict[str, Any]]
     loss_multiplier: float  # multiplier for the loss for this datum. 0 to mask out (say the sample is invalid)
     idx: int
     task_name: NotRequired[str]
diff --git a/nemo_rl/data/multimodal_utils.py b/nemo_rl/data/multimodal_utils.py
index 0da507acc7..918c589ad1 100644
--- a/nemo_rl/data/multimodal_utils.py
+++ b/nemo_rl/data/multimodal_utils.py
@@ -12,9 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import base64
+from io import BytesIO
 from typing import Optional, Union
 
+import requests
 import torch
+from PIL import Image
 from transformers import PreTrainedTokenizerBase
 
 
@@ -179,3 +183,30 @@ def get_dim_to_pack_along(processor, key: str) -> int:
         return 1
     # return zero by default
     return 0
+
+
+def resolve_to_image(image_path_or_image: str | Image.Image) -> Image.Image:
+    """Resolve the image path to a PIL.Image object.
+
+    image_path can be either:
+    - path to local file
+    - url to image
+    - base64 encoded image
+    """
+    if isinstance(image_path_or_image, Image.Image):
+        return image_path_or_image
+
+    if image_path_or_image.startswith(("http://", "https://")):
+        # Handle URL
+        response = requests.get(image_path_or_image)
+        response.raise_for_status()
+        return Image.open(BytesIO(response.content)).convert("RGB")
+    elif image_path_or_image.startswith("data:"):
+        # Handle base64 encoded image
+        # Format: data:image/jpeg;base64,/9j/4AAQSkZJRg...
+        header, encoded = image_path_or_image.split(",", 1)
+        image_data = base64.b64decode(encoded)
+        return Image.open(BytesIO(image_data)).convert("RGB")
+    else:
+        # Handle local file path
+        return Image.open(image_path_or_image).convert("RGB")
diff --git a/nemo_rl/data/packing/algorithms.py b/nemo_rl/data/packing/algorithms.py
index a0eab88f0f..08cd5bcce6 100644
--- a/nemo_rl/data/packing/algorithms.py
+++ b/nemo_rl/data/packing/algorithms.py
@@ -18,6 +18,7 @@
 import math
 import random
 from abc import ABC, abstractmethod
+from bisect import bisect
 from typing import Dict, List, Optional, Tuple, Type, Union
 
 
@@ -611,6 +612,9 @@ def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
 
         # Phase-5: FFD on leftovers
         leftovers = remaining_items  # renamed for clarity
+
+        # Original O(n * m) implementation
+        """
         ffd_bins: List[List[Tuple[int, int]]] = []
         for idx, size in sorted(leftovers, key=lambda x: x[1], reverse=True):
             placed = False
@@ -621,10 +625,31 @@ def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
                     break
             if not placed:
                 ffd_bins.append([(idx, size)])
+        """
+
+        # New O(n * logn) implementation
+        ffd_bins: List[List[Tuple[int, int]]] = [[]]
+        ffd_bin_sizes: List[int] = [0]
+        for idx, size in sorted(leftovers, key=lambda x: x[1], reverse=True):
+            # We only need to check the first bin since we guarantee the order of ffd_bin_sizes to be sorted from smallest to largest.
+            if size <= (self.bin_capacity - ffd_bin_sizes[0]):
+                new_bin = ffd_bins.pop(0)
+                new_bin_size = ffd_bin_sizes.pop(0)
+            else:
+                new_bin = []
+                new_bin_size = 0
+
+            new_bin.append((idx, size))
+            new_bin_size += size
+
+            new_idx = bisect(ffd_bin_sizes, new_bin_size)
+            ffd_bins.insert(new_idx, new_bin)
+            ffd_bin_sizes.insert(new_idx, new_bin_size)
+
         bins.extend(ffd_bins)
 
         # Convert to list of index lists (discard sizes)
-        return [[idx for idx, _ in b] for b in bins]
+        return [[idx for idx, _ in b] for b in bins if b]
 
 
 def get_packer(
diff --git a/nemo_rl/data/processors.py b/nemo_rl/data/processors.py
index 235e77c225..b9c4a1253a 100644
--- a/nemo_rl/data/processors.py
+++ b/nemo_rl/data/processors.py
@@ -17,14 +17,16 @@
 from typing import Any, Dict, cast
 
 import torch
-from transformers import PreTrainedTokenizerBase
+from transformers import AutoProcessor, PreTrainedTokenizerBase
 
 from nemo_rl.data.interfaces import (
     DatumSpec,
     LLMMessageLogType,
     TaskDataProcessFnCallable,
     TaskDataSpec,
+    VLMMessageLogType,
 )
+from nemo_rl.data.llm_message_utils import get_formatted_message_log
 
 TokenizerType = PreTrainedTokenizerBase
 
@@ -132,6 +134,56 @@ def helpsteer3_data_processor(
     return output
 
 
+def sft_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    tokenizer,
+    max_seq_length: int,
+    idx: int,
+    add_bos: bool = True,
+    add_eos: bool = True,
+    add_generation_prompt: bool = False,
+) -> DatumSpec:
+    """Process a datum dictionary for SFT training."""
+    # optional preprocessor
+    if datum_dict["task_name"] == "clevr-cogent":
+        from nemo_rl.data.datasets.response_datasets.clevr import (
+            format_clevr_cogent_dataset,
+        )
+
+        datum_dict = format_clevr_cogent_dataset(datum_dict)
+
+    message_log = get_formatted_message_log(
+        datum_dict["messages"],
+        tokenizer,
+        task_data_spec,
+        add_bos_token=add_bos,
+        add_eos_token=add_eos,
+        add_generation_prompt=add_generation_prompt,
+        tools=datum_dict.get("tools", None),  # Pass tools from data if present
+    )
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+
+    loss_multiplier = 1.0
+    if length > max_seq_length:
+        # make smaller and mask out
+        for message in message_log:
+            message["token_ids"] = message["token_ids"][
+                : min(4, max_seq_length // len(message_log))
+            ]
+        loss_multiplier = 0.0
+
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": None,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+    }
+    return output
+
+
 # Example of a generic math data processor
 def math_data_processor(
     datum_dict: dict[str, Any],
@@ -260,6 +312,151 @@ def math_hf_data_processor(
     return output
 
 
+def vlm_hf_data_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    processor: AutoProcessor,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary (directly loaded from response_datasets/<dataset_name>.py) into a DatumSpec for the VLM Environment."""
+    from nemo_rl.data.datasets.response_datasets.clevr import (
+        format_clevr_cogent_dataset,
+    )
+    from nemo_rl.data.datasets.response_datasets.geometry3k import (
+        format_geometry3k_dataset,
+    )
+    from nemo_rl.data.datasets.response_datasets.refcoco import format_refcoco_dataset
+    from nemo_rl.data.multimodal_utils import (
+        PackedTensor,
+        get_dim_to_pack_along,
+        get_multimodal_keys_from_processor,
+        resolve_to_image,
+    )
+
+    # depending on the task, format the data differently
+    if datum_dict["task_name"] == "clevr-cogent":
+        datum_dict = format_clevr_cogent_dataset(datum_dict)
+    elif datum_dict["task_name"] == "refcoco":
+        datum_dict = format_refcoco_dataset(datum_dict)
+    elif datum_dict["task_name"] == "geometry3k":
+        datum_dict = format_geometry3k_dataset(datum_dict)
+    else:
+        raise ValueError(f"No data processor for task {datum_dict['task_name']}")
+
+    user_message = datum_dict["messages"]
+    problem = user_message[0]["content"]
+    extra_env_info = {"ground_truth": user_message[1]["content"]}
+
+    message_log: VLMMessageLogType = []
+    ### only one round of interaction is assumed, this can easily be extended to a conversational setting
+    user_message: dict[str, Any] = {"role": "user", "content": []}
+    #
+    images = []
+    if isinstance(problem, list):
+        for content in problem:
+            # for image, video, just append it
+            # for text, format the prompt to the problem
+            if content["type"] != "text":
+                user_message["content"].append(content)
+                if content["type"] == "image":
+                    images.append(content["image"])
+                else:
+                    raise ValueError(f"Unsupported content type: {content['type']}")
+            elif content["type"] == "text":
+                user_message["content"].append(
+                    {
+                        "type": "text",
+                        "text": task_data_spec.prompt.format(content["text"])
+                        if task_data_spec.prompt
+                        else content["text"],
+                    }
+                )
+    else:
+        # conversation consists of a text-only message
+        user_message["content"] = task_data_spec.prompt.format(problem)
+
+    images = [resolve_to_image(image) for image in images]
+
+    # get formatted user message
+    if hasattr(processor, "conversation_preprocessor"):
+        user_message_for_chat_template = processor.conversation_preprocessor(
+            user_message
+        )
+    else:
+        user_message_for_chat_template = user_message
+
+    # this is the string-tokenized conversation template for the generation policy (for vllm)
+    string_formatted_dialog = processor.apply_chat_template(
+        [user_message_for_chat_template],
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+
+    # this is the id-tokenized and image processed conversation template for the policy
+    message: dict = processor.apply_chat_template(
+        [user_message],
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        return_dict=True,
+    )
+
+    # add this for backward compatibility
+    user_message["token_ids"] = message["input_ids"][0]
+    # add all keys and values to the user message, and the list of keys
+    multimodal_keys = get_multimodal_keys_from_processor(processor)
+    for key in multimodal_keys:
+        if key in message:
+            user_message[key] = PackedTensor(
+                message[key], dim_to_pack=get_dim_to_pack_along(processor, key)
+            )
+
+    # specifically for gemma, we need to add token_type_ids to the user message as a sequence-type value
+    if "token_type_ids" in message:
+        user_message["token_type_ids"] = message["token_type_ids"][0]
+
+    ### append to user message
+    message_log.append(user_message)
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+    loss_multiplier = 1.0
+    if length >= max_seq_length:
+        # Treat truncated messages as text only
+        vllm_kwargs = {
+            "vllm_content": None,
+            "vllm_images": [],
+        }
+
+        # make smaller and mask out
+        for chat_message in message_log:
+            chat_message["token_ids"] = chat_message["token_ids"][
+                : min(4, max_seq_length // len(message_log))
+            ]
+            for key, value in chat_message.items():
+                if isinstance(value, PackedTensor):
+                    chat_message[key] = PackedTensor.empty_like(value)
+        loss_multiplier = 0.0
+    else:
+        # get the prompt content! (use this for vllm-backend that needs formatted dialog and list of images) for the entire conversation
+        # add images for vllm serving
+        vllm_kwargs = {
+            "vllm_content": string_formatted_dialog,
+            "vllm_images": images,
+        }
+
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": extra_env_info,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+        "task_name": datum_dict["task_name"],
+        **vllm_kwargs,  # pyrefly: ignore[bad-unpacking]
+    }
+    return output
+
+
 def _construct_multichoice_prompt(
     prompt: str, question: str, options: dict[str, str]
 ) -> str:
@@ -291,7 +488,7 @@ def multichoice_qa_processor(
     if "subject" in datum_dict:
         extra_env_info.update({"subject": datum_dict["subject"]})
 
-    message_log = []
+    message_log: LLMMessageLogType = []
 
     # system prompt
     if task_data_spec.system_prompt:
@@ -351,10 +548,12 @@ def multichoice_qa_processor(
     Dict[str, TaskDataProcessFnCallable],
     {
         "default": math_hf_data_processor,
+        "helpsteer3_data_processor": helpsteer3_data_processor,
+        "math_data_processor": math_data_processor,
         "math_hf_data_processor": math_hf_data_processor,
         "multichoice_qa_processor": multichoice_qa_processor,
-        "math_data_processor": math_data_processor,
-        "helpsteer3_data_processor": helpsteer3_data_processor,
+        "sft_processor": sft_processor,
+        "vlm_hf_data_processor": vlm_hf_data_processor,
     },
 )
 
diff --git a/nemo_rl/data/utils.py b/nemo_rl/data/utils.py
new file mode 100644
index 0000000000..40c0463ee5
--- /dev/null
+++ b/nemo_rl/data/utils.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional
+
+from datasets import concatenate_datasets
+from transformers import AutoProcessor, AutoTokenizer
+
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import (
+    AllTaskProcessedDataset,
+    extract_necessary_env_names,
+    load_response_dataset,
+    update_single_dataset_config,
+)
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.environments.utils import create_env
+
+
+def setup_data_with_envs(
+    tokenizer: AutoProcessor | AutoTokenizer,
+    data_config: DataConfig,
+    env_configs: dict[str, Any],
+    is_vlm: bool = False,
+) -> tuple[
+    AllTaskProcessedDataset,
+    Optional[AllTaskProcessedDataset],
+    dict[str, EnvironmentInterface],
+    dict[str, EnvironmentInterface],
+]:
+    """Setup data with environments.
+
+    This function is used to setup the data and environments for the training and validation datasets.
+
+    Args:
+        tokenizer: Tokenizer or processor.
+        data_config: Data config.
+        env_configs: Environment configs.
+        is_vlm: Whether to use VLM training or not.
+
+    Returns:
+        A tuple of (train dataset, validation dataset, task to environment, task to validation environment).
+    """
+    assert "train" in data_config, (
+        "The dataset config structure is updated. Please refer to https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/grpo.md#dataset "
+        "and the Migrate Guide in https://github.com/NVIDIA-NeMo/RL/pull/1649 to update the dataset config."
+    )
+
+    print("\n▶ Setting up envs...")
+    env_name_list = extract_necessary_env_names(data_config)
+    envs = {}
+    for env_name in env_name_list:
+        registered_env_name = "vlm" if is_vlm else env_name
+        envs[env_name] = create_env(
+            env_name=registered_env_name, env_config=env_configs[env_name]
+        )
+
+    print("\n▶ Setting up data...")
+    # setup train dataset
+    if "default" in data_config:
+        update_single_dataset_config(data_config["train"], data_config["default"])
+    data = load_response_dataset(data_config["train"])
+    task_data_processors = {data.task_name: (data.task_spec, data.processor)}
+    task_to_env = {data.task_name: envs[data_config["train"]["env_name"]]}
+
+    dataset = AllTaskProcessedDataset(
+        data.dataset,
+        tokenizer,
+        None,
+        task_data_processors,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+    print(f"  ✓ Training dataset loaded with {len(dataset)} samples.")
+
+    # setup validation dataset
+    val_task_data_processors = {}
+    val_task_to_env = {}
+    val_data_list = []
+
+    # validation dataset from train dataset (when train dataset's split_validation_size > 0)
+    if hasattr(data, "val_dataset") and data.val_dataset is not None:
+        val_data_list.append(data.val_dataset)
+        val_task_data_processors = task_data_processors.copy()
+        val_task_to_env = task_to_env.copy()
+
+    # validation dataset from config
+    if "validation" in data_config and data_config["validation"] is not None:
+        if "default" in data_config:
+            update_single_dataset_config(
+                data_config["validation"], data_config["default"]
+            )
+        val_data = load_response_dataset(data_config["validation"])
+        val_data_list.append(val_data.dataset)
+        val_task_data_processors[val_data.task_name] = (
+            val_data.task_spec,
+            val_data.processor,
+        )
+        val_task_to_env[val_data.task_name] = envs[
+            data_config["validation"]["env_name"]
+        ]
+
+    val_dataset = None
+    if len(val_data_list) > 0:
+        merged_val_data = concatenate_datasets(val_data_list)
+        val_dataset = AllTaskProcessedDataset(
+            merged_val_data,
+            tokenizer,
+            None,
+            val_task_data_processors,
+            max_seq_length=data_config["max_input_seq_length"],
+        )
+        print(f"  ✓ Validation dataset loaded with {len(val_dataset)} samples.")
+
+    return dataset, val_dataset, task_to_env, val_task_to_env
diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py
index 7b91ed1b3b..90d52fe76e 100644
--- a/nemo_rl/distributed/ray_actor_environment_registry.py
+++ b/nemo_rl/distributed/ray_actor_environment_registry.py
@@ -20,6 +20,9 @@
 VLLM_EXECUTABLE = (
     PY_EXECUTABLES.SYSTEM if USE_SYSTEM_EXECUTABLE else PY_EXECUTABLES.VLLM
 )
+SGLANG_EXECUTABLE = (
+    PY_EXECUTABLES.SYSTEM if USE_SYSTEM_EXECUTABLE else PY_EXECUTABLES.SGLANG
+)
 MCORE_EXECUTABLE = (
     PY_EXECUTABLES.SYSTEM if USE_SYSTEM_EXECUTABLE else PY_EXECUTABLES.MCORE
 )
@@ -27,6 +30,7 @@
 ACTOR_ENVIRONMENT_REGISTRY: dict[str, str] = {
     "nemo_rl.models.generation.vllm.vllm_worker.VllmGenerationWorker": VLLM_EXECUTABLE,
     "nemo_rl.models.generation.vllm.vllm_worker_async.VllmAsyncGenerationWorker": VLLM_EXECUTABLE,
+    "nemo_rl.models.generation.sglang.sglang_worker.SGLangGenerationWorker": SGLANG_EXECUTABLE,
     "nemo_rl.models.policy.workers.dtensor_policy_worker.DTensorPolicyWorker": PY_EXECUTABLES.FSDP,
     "nemo_rl.models.policy.workers.dtensor_policy_worker_v2.DTensorPolicyWorkerV2": PY_EXECUTABLES.AUTOMODEL,
     "nemo_rl.models.policy.workers.megatron_policy_worker.MegatronPolicyWorker": MCORE_EXECUTABLE,
diff --git a/nemo_rl/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py
index fa631ff8ee..96282ad623 100644
--- a/nemo_rl/distributed/virtual_cluster.py
+++ b/nemo_rl/distributed/virtual_cluster.py
@@ -61,6 +61,9 @@ class PY_EXECUTABLES:
     # Use NeMo-Gym dependencies
     NEMO_GYM = f"uv run --locked --extra nemo_gym --directory {git_root}"
 
+    # Use NeMo-RL direct dependencies and SGLang.
+    SGLANG = f"uv run --locked --extra sglang --directory {git_root}"
+
 
 @ray.remote  # pragma: no cover
 def _get_node_ip_and_free_port() -> tuple[str, int]:
diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py
index da47ff5184..5ec15c3cef 100644
--- a/nemo_rl/environments/nemo_gym.py
+++ b/nemo_rl/environments/nemo_gym.py
@@ -148,6 +148,10 @@ async def run_rollouts(
     def _postprocess_nemo_gym_to_nemo_rl_result(
         self, nemo_gym_result: dict, tokenizer: PreTrainedTokenizerBase
     ) -> dict:
+        assert isinstance(nemo_gym_result, dict), (
+            f"Hit a non-successful response when querying NeMo Gym for rollouts: {nemo_gym_result}"
+        )
+
         nemo_rl_message_log = []
         seen_token_ids: List[int] = []
         for output_item_dict in nemo_gym_result["response"]["output"]:
diff --git a/nemo_rl/environments/utils.py b/nemo_rl/environments/utils.py
index a9e50c67e1..99fe9eda1a 100644
--- a/nemo_rl/environments/utils.py
+++ b/nemo_rl/environments/utils.py
@@ -43,6 +43,9 @@ class EnvRegistryEntry(TypedDict, total=False):
     "code_jaccard": {
         "actor_class_fqn": "nemo_rl.environments.code_jaccard_environment.CodeJaccardEnvironment",
     },
+    "vlm": {
+        "actor_class_fqn": "nemo_rl.environments.vlm_environment.VLMEnvironment",
+    },
 }
 
 
@@ -93,7 +96,7 @@ def chunk_list_to_workers(to_chunk: list[Any], num_workers: int) -> list[list[An
     return chunks
 
 
-def create_env(env_name: str, env_configs: dict) -> EnvironmentInterface:
+def create_env(env_name: str, env_config: dict) -> EnvironmentInterface:
     assert env_name in ENV_REGISTRY, (
         f"Env name {env_name} is not registered in ENV_REGISTRY. Please call register_env() to register the environment."
     )
@@ -104,7 +107,7 @@ def create_env(env_name: str, env_configs: dict) -> EnvironmentInterface:
             "py_executable": get_actor_python_env(actor_class_fqn),
             "env_vars": dict(os.environ),
         }
-    ).remote(env_configs[env_name])
+    ).remote(env_config)
     return env
 
 
diff --git a/3rdparty/Gym-workspace/is_nemo_gym_installed.py b/nemo_rl/models/automodel/__init__.py
similarity index 80%
rename from 3rdparty/Gym-workspace/is_nemo_gym_installed.py
rename to nemo_rl/models/automodel/__init__.py
index 1a7572b077..341a77c5bc 100644
--- a/3rdparty/Gym-workspace/is_nemo_gym_installed.py
+++ b/nemo_rl/models/automodel/__init__.py
@@ -11,11 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-try:
-    from nemo_gym import config_types  # noqa: F401
-
-    INSTALLED = True
-except Exception:
-    INSTALLED = False
-
-print(f"NEMO_GYM {INSTALLED=}")
diff --git a/nemo_rl/models/automodel/config.py b/nemo_rl/models/automodel/config.py
new file mode 100644
index 0000000000..0e434268b7
--- /dev/null
+++ b/nemo_rl/models/automodel/config.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Configuration classes for automodel-based training in NeMo RL."""
+
+from typing import Any, NamedTuple, Optional
+
+import torch
+from nemo_automodel.components._peft.lora import PeftConfig
+
+
+class RuntimeConfig(NamedTuple):
+    """Runtime configuration for model training and inference.
+
+    This contains all validated runtime settings needed for model initialization,
+    parallelization, and training.
+    """
+
+    # Model loading configuration
+    model_class: type
+    model_config: Any  # AutoConfig
+    hf_config_overrides: dict[str, Any]
+
+    # Attention configuration
+    allow_flash_attn_args: bool
+    attn_impl: Optional[str]
+
+    # Training/inference settings
+    dtype: torch.dtype
+    enable_seq_packing: bool
+    max_grad_norm: float
+
+    # Memory management
+    cpu_offload: bool
+    offload_optimizer_for_logprob: bool
+
+    # Generation configuration
+    is_generation_colocated: Optional[bool]
+
+    # Reward model flag
+    is_reward_model: bool
+
+
+class ModelAndOptimizerState(NamedTuple):
+    """Container for model and optimizer state.
+
+    This named tuple holds all model-related state including the model itself,
+    optimizer, scheduler, and metadata about the model type and configuration.
+    """
+
+    model: torch.nn.Module
+    model_state_dict_keys: list[str]
+    optimizer: Optional[torch.optim.Optimizer]
+    scheduler: Optional[Any]
+    is_hf_model: bool
+    is_moe_model: bool
+    is_reward_model: bool
+    model_class: type
+    model_config: Any
+    peft_config: Optional[PeftConfig]
+    autocast_enabled: bool
diff --git a/nemo_rl/models/automodel/setup.py b/nemo_rl/models/automodel/setup.py
new file mode 100644
index 0000000000..a017332dba
--- /dev/null
+++ b/nemo_rl/models/automodel/setup.py
@@ -0,0 +1,580 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Setup utilities for automodel-based training in NeMo RL."""
+
+import os
+from typing import Any, Optional
+
+import torch
+from accelerate import init_empty_weights
+from hydra.utils import get_class
+from nemo_automodel import NeMoAutoModelForSequenceClassification
+from nemo_automodel._transformers.registry import ModelRegistry
+from nemo_automodel.components._peft.lora import (
+    PeftConfig,
+    apply_lora_to_linear_modules,
+)
+from nemo_automodel.components.config.loader import _resolve_target
+from nemo_automodel.components.distributed.fsdp2 import FSDP2Manager
+from nemo_automodel.components.distributed.tensor_utils import get_cpu_state_dict
+from nemo_automodel.components.moe.parallelizer import (
+    parallelize_model as moe_parallelize_model,
+)
+from torch.distributed.fsdp import CPUOffloadPolicy, MixedPrecisionPolicy
+from transformers import AutoConfig, AutoProcessor, AutoTokenizer, PreTrainedModel
+from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
+
+from nemo_rl.models.automodel.config import ModelAndOptimizerState, RuntimeConfig
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.utils import configure_dynamo_cache, resolve_model_class
+
+STRING_TO_DTYPE = {
+    "float32": torch.float32,
+    "bfloat16": torch.bfloat16,
+    "float16": torch.float16,
+}
+
+
+def validate_and_prepare_config(
+    config: PolicyConfig,
+    processor: Optional[AutoProcessor],
+    rank: int,
+) -> RuntimeConfig:
+    """Validate configuration and prepare runtime settings.
+
+    This function validates the policy configuration, sets environment variables,
+    determines model configuration, and returns runtime settings as a named tuple.
+
+    Args:
+        config: Policy configuration dictionary
+        processor: Optional processor for multimodal models
+        rank: Current process rank
+
+    Returns:
+        RuntimeConfig named tuple containing validated configuration values
+
+    Raises:
+        ValueError: If configuration is invalid
+        RuntimeError: If incompatible settings are detected
+    """
+    # Set basic configuration
+    is_vlm = processor is not None
+    is_generation_colocated = None
+    if "generation" in config and config["generation"] is not None:
+        is_generation_colocated = config["generation"]["colocated"]["enabled"]
+
+    # Set NCCL environment variable
+    if not is_generation_colocated:
+        os.environ["NCCL_CUMEM_ENABLE"] = "1"
+
+    # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
+    # with different order of node_bundles
+    configure_dynamo_cache()
+
+    # Parse precision
+    precision = config["precision"]
+    if precision not in STRING_TO_DTYPE:
+        raise ValueError(f"Unknown precision: {precision}")
+    dtype = STRING_TO_DTYPE[precision]
+
+    # Get other configuration values
+    cpu_offload = config["dtensor_cfg"]["cpu_offload"]
+    offload_optimizer_for_logprob = config.get("offload_optimizer_for_logprob", False)
+    max_grad_norm = config["max_grad_norm"]
+    enable_seq_packing = config["sequence_packing"]["enabled"]
+    model_name = config["model_name"]
+
+    # Validate sequence packing
+    if enable_seq_packing:
+        if is_vlm:
+            raise ValueError(
+                "Sequence packing is not supported for VLM models. "
+                "Please set policy.sequence_packing.enabled = False to train VLM models."
+            )
+        print(f"[Rank {rank}] Sequence packing is enabled for model {model_name}")
+        print(f"[Rank {rank}] Using FlashAttention2 for sequence packing")
+
+    # Get HF config overrides
+    hf_config_overrides = config.get("hf_config_overrides", {}) or {}
+
+    # NeMoAutoModelForCausalLM uses flash_attention_2 by default
+    # so we need to set it to None if sequence packing is disabled
+    # See https://github.com/NVIDIA-NeMo/Automodel/blob/7e748be260651349307862426c0c168cebdeeec3/nemo_automodel/components/_transformers/auto_model.py#L180
+    cp_size_cfg = config["dtensor_cfg"]["context_parallel_size"]
+    attn_impl = (
+        "flash_attention_2"
+        if (enable_seq_packing and cp_size_cfg == 1)
+        else ("sdpa" if cp_size_cfg > 1 else None)
+    )
+
+    # Load model config
+    model_config = AutoConfig.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,  # Always load in float32 for master weights
+        trust_remote_code=True,
+        attn_implementation="flash_attention_2" if enable_seq_packing else None,
+        **hf_config_overrides,
+    )
+
+    # Check if model supports flash attention args
+    allow_flash_attn_args = True
+    if (
+        model_config.architectures[0] == "DeciLMForCausalLM"
+        and model_config.model_type == "nemotron-nas"
+    ):
+        allow_flash_attn_args = False
+
+    # Determine if reward model
+    is_reward_model = (
+        "reward_model_cfg" in config and config["reward_model_cfg"]["enabled"]
+    )
+
+    if is_reward_model:
+        # Validate reward model configuration
+        if enable_seq_packing:
+            raise NotImplementedError(
+                "Sequence packing is not supported for reward models"
+            )
+
+        rm_type = config["reward_model_cfg"]["reward_model_type"]
+        if rm_type == "bradley_terry":
+            model_class = NeMoAutoModelForSequenceClassification
+            if model_config.num_labels != 1:
+                print(
+                    "model_config.num_labels is not 1. Setting it to 1 since this value is used as the out_features "
+                    "for the linear head of Bradley-Terry reward models."
+                )
+                model_config.num_labels = 1
+        else:
+            raise ValueError(f"Unknown reward model type: {rm_type}")
+    else:
+        model_class = resolve_model_class(model_config.model_type)
+
+    # Get parallelization sizes
+    tp_size = config["dtensor_cfg"].get("tensor_parallel_size", 1)
+    cp_size = config["dtensor_cfg"].get("context_parallel_size", 1)
+    sequence_parallel_enabled = config["dtensor_cfg"]["sequence_parallel"]
+
+    # Validate parallelization configuration
+    if cp_size > 1 and enable_seq_packing:
+        raise ValueError(
+            "Context parallel is not supported for sequence packing. "
+            "Refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+        )
+
+    if sequence_parallel_enabled and tp_size == 1:
+        print(
+            "[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. "
+            "Enable tp_size > 1 to use sequence parallelism."
+        )
+
+    return RuntimeConfig(
+        model_class=model_class,
+        model_config=model_config,
+        hf_config_overrides=hf_config_overrides,
+        allow_flash_attn_args=allow_flash_attn_args,
+        attn_impl=attn_impl,
+        dtype=dtype,
+        enable_seq_packing=enable_seq_packing,
+        max_grad_norm=max_grad_norm,
+        cpu_offload=cpu_offload,
+        offload_optimizer_for_logprob=offload_optimizer_for_logprob,
+        is_generation_colocated=is_generation_colocated,
+        is_reward_model=is_reward_model,
+    )
+
+
+def setup_reference_model_state(
+    model: torch.nn.Module,
+) -> dict[str, torch.Tensor]:
+    """Set up reference model state dict by creating a CPU copy of the model's state dict.
+
+    This creates a reference copy of the model weights on CPU with pinned memory
+    for efficient CPU-GPU transfers. The reference model is typically used to
+    compute reference log probabilities during RL training.
+
+    Args:
+        model: The model to create a reference copy from
+
+    Returns:
+        Dictionary mapping parameter names to CPU tensors with pinned memory
+
+    Example:
+        >>> model = setup_model(...)
+        >>> reference_model_state_dict = setup_reference_model_state(model)
+    """
+    return get_cpu_state_dict(model.state_dict().items(), pin_memory=True)
+
+
+def setup_distributed(
+    config: PolicyConfig,
+    runtime_config: RuntimeConfig,
+) -> FSDP2Manager:
+    """Set up distributed training environment and create FSDP2Manager.
+
+    Initializes torch.distributed process group and creates an FSDP2Manager
+    with the appropriate parallelization and precision settings.
+
+    Args:
+        config: Policy configuration dictionary
+        runtime_config: RuntimeConfig named tuple from validate_and_prepare_config
+
+    Returns:
+        FSDP2Manager instance with all distributed configuration
+
+    Note:
+        The returned FSDP2Manager contains all distributed attributes:
+        - dp_size, tp_size, cp_size, ep_size: parallelization sizes
+        - dp_mesh, tp_mesh, cp_mesh, device_mesh: device meshes
+        - moe_mesh: MoE mesh if expert parallelism is used
+        - dp_replicate_size, dp_shard_size, ep_shard_size: sharding sizes
+    """
+    # Initialize process group
+    backend = "nccl" if not runtime_config.cpu_offload else "cuda:nccl,cpu:gloo"
+    torch.distributed.init_process_group(backend=backend)
+    world_size = torch.distributed.get_world_size()
+
+    # Extract configuration values
+    dtype = runtime_config.dtype
+    cpu_offload = runtime_config.cpu_offload
+
+    # Extract parallelization config
+    tp_size = config["dtensor_cfg"].get("tensor_parallel_size", 1)
+    cp_size = config["dtensor_cfg"].get("context_parallel_size", 1)
+    ep_size = config["dtensor_cfg"].get("expert_parallel_size", 1)
+    dp_size = config["dtensor_cfg"].get("data_parallel_size", None)
+    sequence_parallel_enabled = config["dtensor_cfg"]["sequence_parallel"]
+
+    # Create FSDP2 manager
+    manager = FSDP2Manager(
+        dp_size=dp_size,
+        dp_replicate_size=1,
+        tp_size=tp_size,
+        cp_size=cp_size,
+        ep_size=ep_size,
+        pp_size=1,
+        sequence_parallel=sequence_parallel_enabled,
+        use_hf_tp_plan=config["dtensor_cfg"].get("use_hf_tp_plan", False),
+        mp_policy=MixedPrecisionPolicy(
+            param_dtype=dtype,
+            reduce_dtype=torch.float32,
+            output_dtype=torch.float32,
+        ),
+        offload_policy=CPUOffloadPolicy(pin_memory=False) if cpu_offload else None,
+        backend="nccl",
+        world_size=world_size,
+        activation_checkpointing=config["dtensor_cfg"]["activation_checkpointing"],
+        custom_tp_plan=config["dtensor_cfg"].get("custom_parallel_plan", None),
+        defer_fsdp_grad_sync=config["dtensor_cfg"].get("defer_fsdp_grad_sync", True),
+    )
+
+    # Force setup distributed for world size 1 as FSDP2Manager skips it
+    if world_size == 1:
+        manager._setup_distributed()
+
+    return manager
+
+
+def setup_model_and_optimizer(
+    config: PolicyConfig,
+    tokenizer: AutoTokenizer,
+    runtime_config: RuntimeConfig,
+    distributed_manager: FSDP2Manager,
+    checkpoint_manager: Any,
+    is_vlm: bool = False,
+    init_optimizer: bool = True,
+    weights_path: Optional[str] = None,
+    optimizer_path: Optional[str] = None,
+) -> ModelAndOptimizerState:
+    """Set up model, parallelization, and optimizer.
+
+    Creates the model from config, applies parallelization strategies (FSDP2, TP, CP),
+    loads base weights, and optionally initializes optimizer and scheduler.
+
+    Args:
+        config: Policy configuration dictionary
+        tokenizer: Tokenizer for the model
+        runtime_config: RuntimeConfig named tuple from validate_and_prepare_config
+        distributed_manager: FSDP2Manager from setup_distributed
+        checkpoint_manager: Checkpoint manager for loading/saving weights
+        is_vlm: Whether this is a vision-language model
+        init_optimizer: Whether to initialize optimizer
+        weights_path: Optional path to checkpoint weights to load
+        optimizer_path: Optional path to optimizer state to load
+
+    Returns:
+        ModelAndOptimizerState containing model, optimizer, scheduler, and metadata
+
+    Note:
+        The function handles special cases for:
+        - MoE models (uses custom parallelization)
+        - LoRA (applies adapter layers)
+        - Context parallel validation
+        - Tied word embeddings
+    """
+    # Extract configuration values
+    model_config = runtime_config.model_config
+    model_class = runtime_config.model_class
+    attn_impl = runtime_config.attn_impl
+    hf_config_overrides = runtime_config.hf_config_overrides
+    cpu_offload = runtime_config.cpu_offload
+    is_reward_model = runtime_config.is_reward_model
+
+    # Extract distributed configuration from manager
+    rank = torch.distributed.get_rank()
+    device_mesh = distributed_manager.device_mesh
+    moe_mesh = distributed_manager.moe_mesh
+    tp_size = distributed_manager.tp_size
+    cp_size = distributed_manager.cp_size
+    sequence_parallel_enabled = distributed_manager.sequence_parallel
+
+    model_name = config["model_name"]
+
+    # LoRA configuration
+    lora_cfg = config["dtensor_cfg"].get("lora_cfg", None)
+    peft_config = None
+    lora_enabled = lora_cfg is not None and lora_cfg["enabled"]
+    if lora_enabled:
+        if tp_size > 1:
+            assert not lora_cfg["use_triton"], (
+                "Triton is not supported when tensor_parallel_size > 1"
+            )
+        # Always use float32 since FSDP requires all parameters to be in the same dtype
+        cfg_dict_with_dtype = {**lora_cfg, "lora_dtype": "torch.float32"}
+        peft_config = PeftConfig.from_dict(cfg_dict_with_dtype)
+
+    print(f"[Rank {rank}] Initializing empty model for FSDP...")
+
+    # Prepare automodel kwargs
+    automodel_kwargs = config["dtensor_cfg"].get("automodel_kwargs", {})
+    if automodel_kwargs.get("backend", None) is not None:
+        backend_class = _resolve_target(
+            automodel_kwargs.get("backend", None)["_target_"]
+        )
+        backend_kwargs = automodel_kwargs.get("backend")
+        backend_kwargs.pop("_target_")
+        backend = backend_class(**backend_kwargs)
+        automodel_kwargs["backend"] = backend
+
+    if "use_liger_kernel" not in automodel_kwargs:
+        automodel_kwargs["use_liger_kernel"] = False
+
+    # Determine SDPA method for activation checkpointing and CP
+    from torch.nn.attention import SDPBackend
+
+    if cp_size > 1:
+        # Match Automodel's `get_train_context` in `cp_utils.py` where only
+        # flash and efficient backends are supported
+        sdpa_method = [
+            SDPBackend.FLASH_ATTENTION,
+            SDPBackend.EFFICIENT_ATTENTION,
+        ]
+    elif config["dtensor_cfg"]["activation_checkpointing"]:
+        # For activation checkpointing, we must disable the cudnn SDPA backend because
+        # it may not be selected during recomputation.
+        # In that case, we will get the following error:
+        # "Recomputed values have different metadata than during forward pass."
+        sdpa_method = [
+            SDPBackend.FLASH_ATTENTION,
+            SDPBackend.EFFICIENT_ATTENTION,
+            SDPBackend.MATH,
+        ]
+    else:
+        sdpa_method = None
+
+    # Initialize empty model
+    with init_empty_weights():
+        model = model_class.from_pretrained(
+            model_name,
+            attn_implementation=attn_impl,
+            torch_dtype=str(model_config.torch_dtype),
+            trust_remote_code=True,
+            config=model_config,
+            sdpa_method=sdpa_method,
+            **automodel_kwargs,
+        )
+        if lora_enabled:
+            apply_lora_to_linear_modules(model, peft_config)
+
+    # For activation checkpointing, we also must globally disable the cudnn SDPA backend
+    # to ensure that cudnn does not get selected during recomputation.
+    if config["dtensor_cfg"]["activation_checkpointing"]:
+        from torch.backends import cuda
+
+        cuda.enable_cudnn_sdp(False)
+
+    # Store original state dict keys
+    model_state_dict_keys = list(model.state_dict().keys())
+
+    # Set pad token ID if needed
+    if model.config.pad_token_id is None:
+        model.config.pad_token_id = tokenizer.pad_token_id
+
+    # Validate CP configuration with model type
+    if cp_size > 1:
+        if isinstance(model, Gemma3ForCausalLM):
+            raise AssertionError(
+                "Context parallel is not supported for Gemma3ForCausalLM. "
+                "Torch context parallel has many limitations. "
+                "Please refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+            )
+
+        if tp_size > 1 and sequence_parallel_enabled:
+            raise AssertionError(
+                "It's a known issue that context parallel can't be used together with sequence parallel in DTensor worker. "
+                "Please either set cp_size = 1 or disable sequence parallel. "
+                "See https://github.com/NVIDIA-NeMo/RL/issues/659 for more details."
+            )
+
+        if is_vlm:
+            raise AssertionError(
+                "Context parallel is yet not supported for VLM models. Please set cp_size = 1 to train VLM models."
+            )
+
+    # Parallelize model
+    is_moe_model = any(["expert" in key for key in model_state_dict_keys])
+    is_hf_model = (
+        model_config.architectures[0] not in ModelRegistry.model_arch_name_to_cls
+    )
+    # Autocast is disabled for custom MoE models (non-HF) to avoid numerical issues
+    autocast_enabled = not (is_moe_model and not is_hf_model)
+
+    if not isinstance(model, PreTrainedModel) and is_moe_model and not is_hf_model:
+        assert tp_size == 1, (
+            f"Using custom implementation {model.__class__.__name__} for MoE model {model_name} which doesn't support tp_size > 1. "
+            "Please use expert_parallel_size > 1 for custom implementation or set force_hf=True in your config at policy->dtensor_cfg->automodel_kwargs to use the HuggingFace implementation."
+        )
+        assert cp_size == 1, (
+            f"Using custom implementation {model.__class__.__name__} for MoE model {model_name} which doesn't support cp_size > 1. "
+            "Please set force_hf=True in your config at policy->dtensor_cfg->automodel_kwargs to use the HuggingFace implementation."
+        )
+        moe_parallelize_model(
+            model=model,
+            world_mesh=device_mesh,
+            moe_mesh=moe_mesh,
+            pp_enabled=False,
+            dp_axis_names=(
+                ("dp_replicate", "dp_shard_cp")
+                if "dp_replicate" in device_mesh.mesh_dim_names
+                and "dp_shard_cp" in device_mesh.mesh_dim_names
+                else ("dp_shard_cp",)
+            ),
+            cp_axis_name="cp",
+            tp_axis_name="tp",
+            ep_axis_name="ep",
+            ep_shard_axis_names=("ep_shard",),
+        )
+    else:
+        model = distributed_manager.parallelize(model)
+
+    print(model)
+
+    # Set model state dict keys in checkpoint manager
+    checkpoint_manager.set_model_state_dict_keys(model_state_dict_keys)
+
+    # Load base HF weights
+    checkpoint_manager.load_base_model(
+        model,
+        model_name=model_name,
+        hf_cache_dir=hf_config_overrides.get("cache_dir", None),
+        dequantize_base_checkpoint=config.get("dequantize_base_checkpoint", False),
+        peft_init_method=peft_config.lora_A_init if peft_config is not None else None,
+    )
+
+    # Handle tied word embeddings
+    is_tied_lm_head = hasattr(model, "lm_head") and getattr(
+        getattr(model, "config", {}), "tie_word_embeddings", False
+    )
+    if is_tied_lm_head:
+        embed_tokens_weight = None
+        for name, param in model.named_parameters():
+            if "embed_tokens" in name and name.endswith(".weight"):
+                embed_tokens_weight = param
+                break
+
+        if embed_tokens_weight is not None:
+            model.lm_head.weight = embed_tokens_weight
+
+    # CPU offload if needed
+    if cpu_offload:
+        # Move buffers to CPU for FSDP modules
+        for v in model.buffers():
+            v.data = v.data.to("cpu")
+        model = model.to("cpu")
+
+    # Initialize optimizer
+    optimizer = None
+    if init_optimizer:
+        optimizer_cls = get_class(config["optimizer"]["name"])
+        optimizer = optimizer_cls(model.parameters(), **config["optimizer"]["kwargs"])
+
+    # Initialize scheduler
+    scheduler = None
+    if "scheduler" in config and optimizer is not None:
+        if isinstance(config["scheduler"], dict):
+            scheduler_cls = get_class(config["scheduler"]["name"])
+            scheduler = scheduler_cls(optimizer, **config["scheduler"]["kwargs"])
+        else:
+            schedulers = []
+            for scheduler_cfg in config["scheduler"]:
+                if "name" in scheduler_cfg:
+                    schedulers.append(
+                        get_class(scheduler_cfg["name"])(
+                            optimizer, **scheduler_cfg["kwargs"]
+                        )
+                    )
+                else:
+                    assert "milestones" in scheduler_cfg, (
+                        "unknown scheduler config: ",
+                        scheduler_cfg,
+                    )
+                    milestones: list[int] = scheduler_cfg["milestones"]
+
+            scheduler = torch.optim.lr_scheduler.SequentialLR(
+                optimizer, schedulers, milestones
+            )
+    elif optimizer is not None:
+        # Default to passthrough LR schedule
+        scheduler = torch.optim.lr_scheduler.LambdaLR(
+            optimizer, lr_lambda=lambda epoch: 1
+        )
+
+    # Load checkpoint if provided
+    if weights_path:
+        checkpoint_manager.load_checkpoint(
+            model=model,
+            weights_path=weights_path,
+            optimizer=optimizer,
+            optimizer_path=optimizer_path,
+            scheduler=scheduler,
+        )
+    else:
+        print(
+            "No weights path provided. Loaded base HF weights via Checkpointer (default policy init)"
+        )
+
+    return ModelAndOptimizerState(
+        model=model,
+        model_state_dict_keys=model_state_dict_keys,
+        optimizer=optimizer,
+        scheduler=scheduler,
+        is_hf_model=is_hf_model,
+        is_moe_model=is_moe_model,
+        is_reward_model=is_reward_model,
+        model_class=type(model),
+        model_config=model.config,
+        peft_config=peft_config,
+        autocast_enabled=autocast_enabled,
+    )
diff --git a/nemo_rl/models/generation/interfaces.py b/nemo_rl/models/generation/interfaces.py
index d134027bdf..80f4ced95e 100644
--- a/nemo_rl/models/generation/interfaces.py
+++ b/nemo_rl/models/generation/interfaces.py
@@ -257,3 +257,22 @@ def update_weights_from_collective(self) -> list[ray.ObjectRef]:
     # (e.g., vLLM prefix/KV caches) after weight updates.
     def invalidate_kv_cache(self) -> bool:
         return False
+
+    def clear_logger_metrics(self) -> None:
+        """Clear logger metrics for performance reporting.
+
+        This is an optional method that backends can implement to clear
+        telemetry metrics. Default implementation does nothing.
+        """
+        pass
+
+    def get_logger_metrics(self) -> dict[str, Any]:
+        """Get logger metrics for performance reporting.
+
+        This is an optional method that backends can implement to collect
+        telemetry metrics. Default implementation returns empty dict.
+
+        Returns:
+            Dictionary of metrics. Format may vary by backend.
+        """
+        return {}
diff --git a/nemo_rl/models/generation/sglang/__init__.py b/nemo_rl/models/generation/sglang/__init__.py
new file mode 100644
index 0000000000..4073c3884b
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/__init__.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from nemo_rl.models.generation.sglang.config import SGLangConfig
+from nemo_rl.models.generation.sglang.sglang_generation import SGLangGeneration
+
+__all__ = [
+    "SGLangConfig",
+    "SGLangGeneration",
+]
diff --git a/nemo_rl/models/generation/sglang/config.py b/nemo_rl/models/generation/sglang/config.py
new file mode 100644
index 0000000000..9e1ea45253
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/config.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, NotRequired, TypedDict
+
+from nemo_rl.models.generation.interfaces import GenerationConfig
+
+
+class SglangSpecificArgs(TypedDict):
+    """SGLang-specific configuration arguments.
+
+    Most fields below map directly to SGLang's ServerArgs (see:
+    https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/server_args.py).
+    """
+
+    model_path: NotRequired[str]
+    gpus_per_server: NotRequired[int]
+    random_seed: NotRequired[int]
+    skip_tokenizer_init: NotRequired[bool]
+    disable_cuda_graph: NotRequired[bool]
+    disable_radix_cache: NotRequired[bool]
+    disable_cuda_graph_padding: NotRequired[bool]
+    enable_nccl_nvls: NotRequired[bool]
+    disable_outlines_disk_cache: NotRequired[bool]
+    disable_custom_all_reduce: NotRequired[bool]
+    disable_overlap_schedule: NotRequired[bool]
+    enable_mixed_chunk: NotRequired[bool]
+    enable_dp_attention: NotRequired[bool]
+    enable_ep_moe: NotRequired[bool]
+    enable_torch_compile: NotRequired[bool]
+    torch_compile_max_bs: NotRequired[int]
+    cuda_graph_max_bs: NotRequired[int | None]
+    cuda_graph_bs: NotRequired[list[int] | None]
+    torchao_config: NotRequired[str]
+    enable_nan_detection: NotRequired[bool]
+    enable_p2p_check: NotRequired[bool]
+    triton_attention_reduce_in_fp32: NotRequired[bool]
+    triton_attention_num_kv_splits: NotRequired[int]
+    num_continuous_decode_steps: NotRequired[int]
+    enable_memory_saver: NotRequired[bool]
+    allow_auto_truncate: NotRequired[bool]
+    attention_backend: NotRequired[str | None]
+    enable_multimodal: NotRequired[bool]
+    sampling_backend: NotRequired[str | None]
+    context_length: NotRequired[int | None]
+    mem_fraction_static: NotRequired[float | None]
+    max_running_requests: NotRequired[int | None]
+    chunked_prefill_size: NotRequired[int | None]
+    max_prefill_tokens: NotRequired[int]
+    schedule_policy: NotRequired[str]
+    schedule_conservativeness: NotRequired[float]
+    cpu_offload_gb: NotRequired[int]
+    dtype: NotRequired[str]
+    kv_cache_dtype: NotRequired[str]
+    dp_size: NotRequired[int]  # only used for dp attention
+    pp_size: NotRequired[int]  # pipeline parallel size
+    ep_size: NotRequired[int]
+    # lora
+    enable_lora: NotRequired[bool | None]
+    max_lora_rank: NotRequired[int | None]
+    lora_target_modules: NotRequired[list[str] | None]
+    lora_paths: NotRequired[list[str] | None]
+    max_loaded_loras: NotRequired[int]
+    max_loras_per_batch: NotRequired[int]
+    lora_backend: NotRequired[str]
+    # logging
+    log_level: NotRequired[str]
+    log_level_http: NotRequired[str | None]
+    log_requests: NotRequired[bool]
+    log_requests_level: NotRequired[int]
+    show_time_cost: NotRequired[bool]
+    enable_metrics: NotRequired[bool]  # Exports Prometheus-like metrics
+    # The interval (in decoding iterations) to log throughput
+    # and update prometheus metrics
+    decode_log_interval: NotRequired[int]
+    # Extra loader arguments
+    enable_multithread_load: NotRequired[bool]
+    enable_fast_load: NotRequired[bool]
+    # Server warmup
+    skip_server_warmup: NotRequired[bool]
+
+
+class SGLangConfig(GenerationConfig):
+    """Configuration for SGLang runtime."""
+
+    sglang_cfg: SglangSpecificArgs
+    sglang_kwargs: NotRequired[dict[str, Any]]
diff --git a/nemo_rl/models/generation/sglang/sglang_copied_utils.py b/nemo_rl/models/generation/sglang/sglang_copied_utils.py
new file mode 100644
index 0000000000..aa9eafea01
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/sglang_copied_utils.py
@@ -0,0 +1,186 @@
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Standalone utility functions copied from the SGLang project.
+
+This module contains utility functions that were originally part of the SGLang
+repository (https://github.com/sgl-project/sglang). They have been copied here
+to avoid requiring sglang as a runtime dependency for weight refitting functionality.
+
+IMPORTANT: This module should NOT contain any imports from the sglang package.
+All functions are standalone and self-contained.
+
+Each function includes a permalink to its original source in the SGLang repository.
+These functions were copied from sglang version 0.5.2.
+"""
+
+import io
+from multiprocessing.reduction import ForkingPickler
+from typing import Callable, Union
+
+import pybase64
+import torch
+from torch.multiprocessing import reductions
+
+
+class MultiprocessingSerializer:  # pragma: no cover
+    """Serialize/deserialize Python objects using ForkingPickler for IPC.
+
+    This class enables serialization of objects (including CUDA tensors with IPC
+    handles) for transfer between processes via HTTP or other mechanisms.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/utils.py#L589-L623
+    """
+
+    @staticmethod
+    def serialize(obj, output_str: bool = False):
+        """Serialize a Python object using ForkingPickler.
+
+        Args:
+            obj: The object to serialize.
+            output_str (bool): If True, return a base64-encoded string instead of raw bytes.
+
+        Returns:
+            bytes or str: The serialized object.
+        """
+        buf = io.BytesIO()
+        ForkingPickler(buf).dump(obj)
+        buf.seek(0)
+        output = buf.read()
+
+        if output_str:
+            # Convert bytes to base64-encoded string
+            output = pybase64.b64encode(output).decode("utf-8")
+
+        return output
+
+    @staticmethod
+    def deserialize(data):
+        """Deserialize a previously serialized object.
+
+        Args:
+            data (bytes or str): The serialized data, optionally base64-encoded.
+
+        Returns:
+            The deserialized Python object.
+        """
+        if isinstance(data, str):
+            # Decode base64 string to bytes
+            data = pybase64.b64decode(data, validate=True)
+
+        return ForkingPickler.loads(data)
+
+
+def monkey_patch_torch_reductions():  # pragma: no cover
+    """Monkey patch torch multiprocessing reductions to use GPU UUIDs.
+
+    This patch modifies PyTorch's CUDA tensor IPC mechanism to use GPU UUIDs
+    instead of device indices. This enables proper weight transfer between
+    processes that may have different CUDA_VISIBLE_DEVICES configurations.
+
+    The patch is idempotent - calling it multiple times is safe.
+
+    This is a workaround before PyTorch https://github.com/pytorch/pytorch/pull/149248
+    is merged and released.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L20-L33
+    """
+    if hasattr(reductions, "_reduce_tensor_original"):
+        return
+
+    reductions._reduce_tensor_original = reductions.reduce_tensor
+    reductions._rebuild_cuda_tensor_original = reductions.rebuild_cuda_tensor
+
+    reductions.reduce_tensor = _reduce_tensor_modified
+    reductions.rebuild_cuda_tensor = _rebuild_cuda_tensor_modified
+
+    reductions.init_reductions()
+
+
+# The signature has not been changed for years, and we will not need this when
+# the next version is released, so it looks safe to use a constant.
+# Original source (sglang v0.5.2):
+# https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L36
+_REDUCE_TENSOR_ARG_DEVICE_INDEX = 6
+
+
+def _reduce_tensor_modified(*args, **kwargs):  # pragma: no cover
+    """Modified reduce_tensor that stores GPU UUID instead of device index.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L39-L43
+    """
+    output_fn, output_args = reductions._reduce_tensor_original(*args, **kwargs)
+    output_args = _modify_tuple(
+        output_args, _REDUCE_TENSOR_ARG_DEVICE_INDEX, _device_to_uuid
+    )
+    return output_fn, output_args
+
+
+def _rebuild_cuda_tensor_modified(*args):  # pragma: no cover
+    """Modified rebuild_cuda_tensor that accepts GPU UUID or device index.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L46-L48
+    """
+    args = _modify_tuple(args, _REDUCE_TENSOR_ARG_DEVICE_INDEX, _device_from_maybe_uuid)
+    return reductions._rebuild_cuda_tensor_original(*args)
+
+
+def _device_to_uuid(device: int) -> str:  # pragma: no cover
+    """Convert a device index to its UUID string.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L51-L52
+    """
+    return str(torch.cuda.get_device_properties(device).uuid)
+
+
+def _device_from_maybe_uuid(
+    device_maybe_uuid: Union[int, str],
+) -> int:  # pragma: no cover
+    """Convert a device UUID string or index to a device index.
+
+    Args:
+        device_maybe_uuid: Either an integer device index or a UUID string.
+
+    Returns:
+        The integer device index.
+
+    Raises:
+        Exception: If the UUID doesn't match any available device.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L55-L65
+    """
+    if isinstance(device_maybe_uuid, int):
+        return device_maybe_uuid
+
+    if isinstance(device_maybe_uuid, str):
+        for device in range(torch.cuda.device_count()):
+            if str(torch.cuda.get_device_properties(device).uuid) == device_maybe_uuid:
+                return device
+        raise Exception("Invalid device_uuid=" + device_maybe_uuid)
+
+    raise Exception(f"Unknown type: {device_maybe_uuid=}")
+
+
+def _modify_tuple(t, index: int, modifier: Callable):  # pragma: no cover
+    """Create a new tuple with one element modified by a function.
+
+    Original source (sglang v0.5.2):
+    https://github.com/sgl-project/sglang/blob/v0.5.2/python/sglang/srt/patch_torch.py#L68-L69
+    """
+    return *t[:index], modifier(t[index]), *t[index + 1 :]
diff --git a/nemo_rl/models/generation/sglang/sglang_generation.py b/nemo_rl/models/generation/sglang/sglang_generation.py
new file mode 100644
index 0000000000..85122779ee
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/sglang_generation.py
@@ -0,0 +1,384 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from typing import (
+    Any,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import ray
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict, SlicedDataDict
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+    GenerationInterface,
+    GenerationOutputSpec,
+)
+from nemo_rl.models.generation.sglang.config import SGLangConfig
+
+# Global thresholds for top_k and top_p validation.
+# While top-k/p are not supported, these values allow for token filtering while the logprobs should be compatible.
+# See https://github.com/NVIDIA-NeMo/RL/issues/69 and https://github.com/NVIDIA-NeMo/RL/issues/237 for more details.
+TOP_K_THRESHOLD = 8000  # Allow top_k >= 8000 (effectively no filtering)
+TOP_P_THRESHOLD = 0.99  # Allow top_p >= 0.99 (close to 1.0)
+
+logger = logging.getLogger(__name__)
+
+
+class SGLangGeneration(GenerationInterface):
+    def __init__(
+        self,
+        cluster: RayVirtualCluster,
+        config: SGLangConfig,
+        name_prefix: str = "sglang_policy",
+        workers_per_node: Optional[Union[int, list[int]]] = None,
+    ):
+        """Initialize a SGLang policy with distributed workers.
+
+        SGLang server manages TP/PP internally, but we still need to:
+        1. Manage data parallel distribution across multiple servers
+        2. Assign GPU bundles to each server
+
+        Each server will see logical GPUs 0-N (via CUDA_VISIBLE_DEVICES set by Ray),
+        so we just need to tell SGLang how many GPUs to use (tp_size).
+        """
+        # Store config
+        self.cfg = config
+        self.sglang_cfg = config["sglang_cfg"]
+
+        gpus_per_server = self.sglang_cfg.get("gpus_per_server", None)
+        if gpus_per_server is None:
+            raise ValueError("gpus_per_server must be set in SGLangConfig.sglang_cfg.")
+
+        # Calculate number of servers based on available resources
+        total_gpus = cluster.world_size()
+        num_servers = total_gpus // gpus_per_server
+
+        if num_servers == 0:
+            raise ValueError(
+                f"Not enough GPUs. Need at least {gpus_per_server} GPUs per server, "
+                f"but only have {total_gpus} GPUs total."
+            )
+
+        if total_gpus % gpus_per_server != 0:
+            logger.warning(
+                f"[WARNING] Total GPUs ({total_gpus}) is not divisible by GPUs per server ({gpus_per_server}). "
+                f"Will use {num_servers} servers, leaving {total_gpus % gpus_per_server} GPUs unused."
+            )
+
+        self.dp_size = num_servers
+        self.gpus_per_server = gpus_per_server
+
+        # Create sharding annotations
+        # Even though SGLang manages TP internally, we include it in the layout to support
+        # RayWorkerGroup's worker management (which creates one worker per GPU bundle).
+        # The TP dimension becomes a "free axis" in run_all_workers_sharded_data, ensuring
+        # only the primary workers (TP rank 0) are called.
+        total_workers = num_servers * gpus_per_server
+        self.sharding_annotations = NamedSharding(
+            layout=np.arange(total_workers).reshape(num_servers, gpus_per_server),
+            names=["data_parallel", "tensor_parallel"],
+        )
+
+        # Initialize placement groups
+        # For SGLang, we use PACK strategy to keep bundles together
+        # colocated is always at top level, not in sglang_cfg
+        strategy = None if self.cfg["colocated"]["enabled"] else "PACK"
+        cluster._init_placement_groups(
+            strategy=strategy,
+            use_unified_pg=False,  # SGLang servers don't need cross-node model parallelism
+        )
+
+        # Create worker builder for SGLangGenerationWorker
+        worker_cls = (
+            "nemo_rl.models.generation.sglang.sglang_worker.SGLangGenerationWorker"
+        )
+        worker_builder = RayWorkerBuilder(worker_cls, config)
+
+        env_vars = {}
+        global_cvd = os.environ.get("CUDA_VISIBLE_DEVICES", None)
+        if global_cvd:
+            # Explicitly pass CUDA_VISIBLE_DEVICES to workers via env_vars
+            # This ensures all workers see the same global value, even though
+            env_vars["CUDA_VISIBLE_DEVICES"] = global_cvd
+
+        # Allocate bundles for each server
+        # Each server gets consecutive bundles
+        bundle_indices_list = self._allocate_bundles_for_servers(
+            cluster, num_servers, gpus_per_server
+        )
+
+        # Create worker group with explicit bundle allocation
+        self.worker_group = RayWorkerGroup(
+            cluster,
+            worker_builder,
+            name_prefix=name_prefix,
+            bundle_indices_list=bundle_indices_list,
+            sharding_annotations=self.sharding_annotations,
+            env_vars=env_vars,
+        )
+
+        # Verify data parallel size matches
+        assert self.dp_size == self.worker_group.dp_size, (
+            f"Data parallel size mismatch. Expected {self.dp_size}, got {self.worker_group.dp_size}"
+        )
+
+        # Used to track the round-robin selection of worker groups for generate_async
+        self.current_generate_dp_shard_idx = 0
+
+    def _allocate_bundles_for_servers(
+        self,
+        cluster: RayVirtualCluster,
+        num_servers: int,
+        gpus_per_server: int,
+    ) -> list[tuple[int, list[int]]]:
+        """Allocate GPU bundles to each SGLang server.
+
+        Each server gets consecutive bundles within the same placement group (node).
+        Ray will automatically set CUDA_VISIBLE_DEVICES so each server sees logical GPUs 0, 1, 2, ..., gpus_per_server-1.
+
+        Args:
+            cluster: The Ray virtual cluster
+            num_servers: Total number of SGLang servers to create
+            gpus_per_server: Number of GPUs each server needs
+
+        Returns:
+            List of (node_idx, [bundle_indices]) tuples for each server
+        """
+        placement_groups = cluster.get_placement_groups()
+
+        if not placement_groups:
+            raise ValueError("No placement groups available in the cluster")
+
+        bundle_indices_list = []
+
+        # Each server's bundles must be within the same placement group (node)
+        server_idx = 0
+        for pg_idx, pg in enumerate(placement_groups):
+            if pg.bundle_count == 0:
+                continue
+
+            # Calculate how many servers can fit in this placement group
+            num_servers_in_pg = pg.bundle_count // gpus_per_server
+
+            # Allocate servers within this placement group
+            for local_server_idx in range(num_servers_in_pg):
+                if server_idx >= num_servers:
+                    break
+
+                # Calculate which bundles this server gets (consecutive within the PG)
+                start_bundle = local_server_idx * gpus_per_server
+                server_bundles = list(
+                    range(start_bundle, start_bundle + gpus_per_server)
+                )
+
+                # Each server gets a tuple of (node_idx, [local_bundle_indices])
+                bundle_indices_list.append((pg_idx, server_bundles))
+                server_idx += 1
+
+            if server_idx >= num_servers:
+                break
+
+        if len(bundle_indices_list) < num_servers:
+            total_available = sum(
+                pg.bundle_count // gpus_per_server
+                for pg in placement_groups
+                if pg.bundle_count > 0
+            )
+            raise ValueError(
+                f"Not enough bundles to allocate all {num_servers} servers. "
+                f"Only {total_available} servers can be allocated "
+                f"(each server needs {gpus_per_server} GPUs)."
+            )
+
+        return bundle_indices_list
+
+    def init_collective(
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
+    ) -> list[ray.ObjectRef]:
+        """Initialize the collective communication.
+
+        TODO:       if weight updates via NCCL are needed in the future.
+        """
+        return []
+
+    def generate(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate a batch of data using SGLang."""
+        assert isinstance(data, BatchedDataDict), (
+            f"data must be a BatchedDataDict, got type: {type(data)}"
+        )
+        assert "input_ids" in data and "input_lengths" in data, (
+            "input_ids and input_lengths are required in data for SGLang generation"
+        )
+
+        # Shard the data across the data parallel servers
+        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
+        sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
+            dp_size, allow_uneven_shards=True
+        )
+        future_bundle = self.worker_group.run_all_workers_sharded_data(
+            "generate",
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=None,
+            output_is_replicated=None,
+            common_kwargs={"greedy": greedy},
+        )
+
+        # Get results from the workers
+        results = self.worker_group.get_all_worker_results(future_bundle)
+
+        # Combine results from all servers
+        combined: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
+            results, pad_value_dict={"output_ids": self.cfg["_pad_token_id"]}
+        )
+
+        # Verify the output has all required fields
+        required_keys = [
+            "output_ids",
+            "generation_lengths",
+            "unpadded_sequence_lengths",
+            "logprobs",
+        ]
+        missing_keys = [key for key in required_keys if key not in combined]
+        if missing_keys:
+            raise ValueError(
+                f"Missing required keys for GenerationOutputSpec: {missing_keys}"
+            )
+
+        return combined
+
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        pass
+
+    def update_weights_via_ipc_zmq(self) -> list[ray.ObjectRef]:
+        return []
+
+    def update_weights_from_collective(self) -> list[ray.ObjectRef]:
+        return []
+
+    def get_sglang_server_urls(self) -> list[str]:
+        """Get base URLs of all SGLang servers.
+
+        Returns:
+            List of base URLs (e.g., ["http://localhost:30000", "http://localhost:30001"])
+        """
+        if not self.worker_group or not self.worker_group.workers:
+            raise RuntimeError("Worker group is not initialized")
+
+        # Get base URLs from all workers (only primary workers, TP rank 0)
+        # Use run_rank_0_only_axes to only get URLs from primary workers
+        futures = self.worker_group.run_all_workers_single_data(
+            "get_base_url",
+            run_rank_0_only_axes=["tensor_parallel"],
+        )
+        urls = ray.get(futures)
+        # Filter out None values and return unique URLs
+        return list(set(url for url in urls if url is not None))
+
+    def get_sglang_url_to_gpu_uuids(self) -> dict[str, list[str]]:
+        """Get mapping from SGLang server URL to list of GPU UUIDs it uses.
+
+        Returns:
+            Dict mapping server URL to list of GPU UUIDs
+            e.g., {"http://localhost:30000": ["GPU-aaa", "GPU-bbb"], ...}
+        """
+        if not self.worker_group or not self.worker_group.workers:
+            raise RuntimeError("Worker group is not initialized")
+
+        # Get base URLs and GPU UUIDs from all primary workers (TP rank 0)
+        futures_url = self.worker_group.run_all_workers_single_data(
+            "get_base_url",
+            run_rank_0_only_axes=["tensor_parallel"],
+        )
+        futures_uuids = self.worker_group.run_all_workers_single_data(
+            "get_gpu_uuids",
+            run_rank_0_only_axes=["tensor_parallel"],
+        )
+
+        urls = ray.get(futures_url)
+        uuids_list = ray.get(futures_uuids)
+
+        # Create mapping
+        url_to_uuids = {}
+        for url, uuids in zip(urls, uuids_list):
+            if url is not None and uuids is not None:
+                url_to_uuids[url] = uuids
+
+        return url_to_uuids
+
+    def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
+        """Wake workers up for colocated inference."""
+        pass
+
+    def finish_generation(self, *args: Any, **kwargs: Any) -> bool:
+        """Sleep workers and reset prefix cache."""
+        pass
+
+    def shutdown(self) -> bool:
+        """Shut down all SGLang workers and clean up resources."""
+        try:
+            # Use the worker group's shutdown method with the worker's cleanup method
+            return self.worker_group.shutdown(cleanup_method="shutdown")
+        except Exception as e:
+            logger.error(f"Error during SGLang policy shutdown: {e}")
+            return False
+
+    def __del__(self) -> None:
+        """Shuts down the worker groups when the object is deleted or is garbage collected.
+
+        This is an extra safety net in case the user forgets to call shutdown() and the pointer to
+        the object is lost due to leaving a function scope. It's always recommended that the
+        user calls shutdown().
+        """
+        self.shutdown()
+
+    def invalidate_kv_cache(self) -> bool:
+        """Invalidate KV cache before weight updates (Megatron-style).
+
+        This flushes the cache before weight updates to clear stale cache.
+        Only primary workers (TP rank 0, model owners) will flush their cache.
+
+        Returns:
+            bool: True if all caches were flushed successfully, False otherwise
+        """
+        try:
+            futures = self.worker_group.run_all_workers_single_data(
+                "invalidate_kv_cache",
+                run_rank_0_only_axes=["tensor_parallel"],
+            )
+            results = ray.get(futures)
+            results = [r for r in results if r is not None]
+            success = all(result for result in results) if results else True
+            if success:
+                logger.info(
+                    "[sglang refit] All SGLang server caches flushed successfully"
+                )
+            else:
+                logger.warning(
+                    "[sglang refit] WARNING - Some SGLang server caches failed to flush"
+                )
+            return success
+        except Exception as e:
+            logger.error(f"[sglang refit] Error flushing SGLang caches: {e}")
+            return False
diff --git a/nemo_rl/models/generation/sglang/sglang_worker.py b/nemo_rl/models/generation/sglang/sglang_worker.py
new file mode 100644
index 0000000000..6f15cba1fc
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/sglang_worker.py
@@ -0,0 +1,804 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import logging
+import multiprocessing
+import os
+import time
+from typing import Any, Optional
+
+import aiohttp
+import ray
+import requests
+import torch
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import _get_free_port_local, _get_node_ip_local
+from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+    GenerationOutputSpec,
+    verify_right_padding,
+)
+from nemo_rl.models.generation.sglang.config import SGLangConfig
+from nemo_rl.models.generation.sglang.utils import AsyncLoopThread
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+
+logger = logging.getLogger(__name__)
+
+
+def _require_sglang():
+    """Import `sglang` lazily so test collection works without the optional extra."""
+    try:
+        from sglang.srt.entrypoints.http_server import launch_server
+        from sglang.srt.server_args import ServerArgs
+        from sglang.srt.utils import kill_process_tree
+    except ModuleNotFoundError as e:  # pragma: no cover
+        raise ModuleNotFoundError(
+            "Optional dependency `sglang` is required for the SGLang generation backend.\n"
+            "Install it via the project extra (e.g. `uv run --extra sglang ...`) to use "
+            "`SGLangGenerationWorker`."
+        ) from e
+
+    return launch_server, ServerArgs, kill_process_tree
+
+
+@ray.remote(
+    runtime_env={**get_nsight_config_if_pattern_matches("sglang_generation_worker")}
+)  # pragma: no cover
+class SGLangGenerationWorker:
+    def __repr__(self) -> str:
+        """Customizes the actor's prefix in the Ray logs.
+
+        This makes it easier to identify which worker is producing specific log messages.
+        """
+        return f"{self.__class__.__name__}"
+
+    @staticmethod
+    def configure_worker(
+        num_gpus: int | float, bundle_indices: Optional[tuple[int, list[int]]] = None
+    ) -> tuple[dict[str, Any], dict[str, str], dict[str, Any]]:
+        """Provides complete worker configuration for SGLang server.
+
+        This method configures the worker based on bundle_indices which tells us
+        how many GPUs this server should use.
+
+        Args:
+            num_gpus: Original GPU allocation for this worker based on the placement group
+            bundle_indices: Tuple of (node_idx, local_bundle_indices) for this server
+
+        Returns:
+            tuple with complete worker configuration:
+              - 'resources': Resource allocation (e.g., num_gpus)
+              - 'env_vars': Environment variables for this worker
+              - 'init_kwargs': Parameters to pass to __init__ of the worker
+        """
+        # Initialize configuration
+        resources: dict[str, Any] = {"num_gpus": num_gpus}
+        init_kwargs: dict[str, Any] = {}
+        env_vars: dict[str, str] = {}
+
+        local_bundle_indices = None
+        if bundle_indices is not None:
+            node_idx = bundle_indices[0]
+            local_bundle_indices = bundle_indices[1]
+            init_kwargs["bundle_indices"] = local_bundle_indices
+
+            # Calculate a unique seed from node_idx and bundle_indices
+            if len(local_bundle_indices) == 1:
+                seed = node_idx * 1024 + local_bundle_indices[0]
+            else:
+                bundle_id = local_bundle_indices[0] // len(local_bundle_indices)
+                seed = node_idx * 1024 + bundle_id
+
+            init_kwargs["seed"] = seed
+
+        # Check if this worker is part of a parallel group (multiple GPUs per server).
+        # A worker with local rank =0 owns the server(local_bundle_indices is not None )
+        # otherwise it is a placeholder for Ray's resource management (local_bundle_indices is None).
+        is_part_of_parallel_workers = (
+            local_bundle_indices is not None and len(local_bundle_indices) > 1
+        ) or local_bundle_indices is None
+
+        if is_part_of_parallel_workers:
+            # For parallel workers, we manage GPU assignment via base_gpu_id
+            # All workers see the same global CUDA_VISIBLE_DEVICES, but use different
+            # logical GPU ranges via base_gpu_id
+            resources["num_gpus"] = 0
+            env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
+            init_kwargs["fraction_of_gpus"] = num_gpus
+        else:
+            env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
+
+        return resources, env_vars, init_kwargs
+
+    def __init__(
+        self,
+        config: SGLangConfig,
+        bundle_indices: Optional[list[int]] = None,
+        fraction_of_gpus: float = 1.0,
+        seed: Optional[int] = None,
+    ):
+        """Initialize a SGLang worker for distributed inference.
+
+        Args:
+            config: Configuration dictionary for the policy
+            bundle_indices: List of local bundle indices for this server.
+                          The length of this list determines tp_size (number of GPUs per server).
+                          Only needed for the first worker in each server group (model owner).
+            fraction_of_gpus: Fraction of GPUs to use for this worker
+            seed: Random seed for initialization, if None, then defaults to the config's seed
+        """
+        self.cfg = config
+        self.is_model_owner = bundle_indices is not None
+        self.global_rank = int(os.environ.get("RANK", "0"))
+        self.sglang_cfg = config["sglang_cfg"]
+
+        # Create a dedicated event loop thread for async operations
+        # there will be issues if we use the event loop in the main thread
+        self.async_loop_thread = AsyncLoopThread()
+
+        # temp: Maximum concurrent requests per server
+        # we may remove this limit in the future
+        self.max_concurrent_requests = config.get("max_concurrent_requests", 999999)
+
+        # Only the primary worker (local_rank=0) in each server group starts the SGLang server
+        # Secondary workers (local_rank!=0) just returns
+        if not self.is_model_owner:
+            return
+
+        # `sglang` is an optional dependency; import only when we actually start a server.
+        _, ServerArgs, _ = _require_sglang()
+
+        # Determine tp_size from bundle_indices length
+        tp_size = len(bundle_indices)
+
+        base_gpu_id = bundle_indices[0] if bundle_indices else 0
+
+        # Get the global CUDA_VISIBLE_DEVICES (all engines see the same global value)
+        global_cvd = os.environ.get("CUDA_VISIBLE_DEVICES", None)
+
+        logger.info(
+            f"[SGLang Server] Rank {self.global_rank}: "
+            f"base_gpu_id={base_gpu_id}, tp_size={tp_size}, "
+            f"bundle_indices={bundle_indices}, global_cvd={global_cvd}"
+        )
+
+        # Get current node IP and a free port for the server
+        node_ip = _get_node_ip_local()
+        free_port = _get_free_port_local()
+
+        # Build SGLang server arguments
+        kwargs = {
+            "model_path": self.sglang_cfg["model_path"],
+            "trust_remote_code": True,
+            "random_seed": seed
+            if seed is not None
+            else self.sglang_cfg.get("random_seed", 1),
+            # Memory settings
+            "enable_memory_saver": self.sglang_cfg["enable_memory_saver"],
+            "gpu_id_step": 1,
+            "base_gpu_id": base_gpu_id,
+            # Parallel settings
+            "tp_size": tp_size,
+            "dp_size": self.sglang_cfg["dp_size"],
+            "pp_size": self.sglang_cfg["pp_size"],
+            "ep_size": self.sglang_cfg["ep_size"],
+            # Always skip warmup to prevent warmup timeout
+            "skip_server_warmup": self.sglang_cfg.get("skip_server_warmup", True),
+            # Server network settings - listen on all interfaces, use the free port we found
+            "host": "0.0.0.0",
+            "port": free_port,
+            "torchao_config": "",
+        }
+
+        for key in [
+            "dtype",
+            "kv_cache_dtype",
+            "context_length",
+            "max_running_requests",
+            "chunked_prefill_size",
+            "max_prefill_tokens",
+            "schedule_policy",
+            "schedule_conservativeness",
+            "cpu_offload_gb",
+            "log_level",
+            "mem_fraction_static",
+            "allow_auto_truncate",
+        ]:
+            if key in self.sglang_cfg:
+                kwargs[key] = self.sglang_cfg[key]
+
+        server_args = ServerArgs(**kwargs)
+        # Save server_args and base_url for use in generate() and _make_request()
+        self.server_args = server_args
+        self.base_url = f"http://{node_ip}:{free_port}"
+
+        logger.info(
+            f"[SGLang Worker] Rank {self.global_rank} Starting on {self.base_url}, CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', None)}, base_gpu_id: {base_gpu_id}"
+        )
+
+        self.session = None
+        self.connector = None
+
+        self.server_process = self._launch_server_process(server_args)
+
+    def get_base_url(self) -> str:
+        """Get the base URL of this SGLang server."""
+        return self.base_url
+
+    def invalidate_kv_cache(self) -> bool:
+        """Invalidate KV cache before weight updates (Megatron-style).
+
+        This flushes the cache before weight updates to clear stale cache.
+        Uses retry logic to handle cases where there are pending requests.
+
+        Returns:
+            bool: True if flush was successful, False otherwise
+        """
+        if not self.is_model_owner:
+            return True
+
+        url = f"{self.base_url}/flush_cache"
+        max_attempts = 60
+        connection_retry_limit = 5
+
+        # flush_cache will not return status_code 200 when there are pending requests
+        for attempt in range(max_attempts):
+            try:
+                response = requests.get(url, timeout=10)
+                if response.status_code == 200:
+                    if attempt > 0:
+                        logger.info(
+                            f"[SGLang Worker] Rank {self.global_rank} Cache flushed successfully "
+                            f"(attempt {attempt + 1})"
+                        )
+                    return True
+            except requests.exceptions.ConnectionError:
+                # Server might not be ready yet - only retry for first few attempts
+                if attempt >= connection_retry_limit:
+                    logger.warning(
+                        f"[SGLang Worker] Rank {self.global_rank} Connection failed after "
+                        f"{connection_retry_limit} attempts"
+                    )
+                    return False
+            except Exception as e:
+                # For other errors, log and retry (except on last attempt)
+                if attempt == max_attempts - 1:
+                    logger.error(
+                        f"[SGLang Worker] Rank {self.global_rank} Failed to flush cache after "
+                        f"{max_attempts} attempts: {e}"
+                    )
+                    return False
+
+            time.sleep(1)
+
+        # All attempts exhausted without success
+        logger.error(
+            f"[SGLang Worker] Rank {self.global_rank} Timeout: Cache flush failed after "
+            f"{max_attempts} attempts. Server may have pending requests."
+        )
+        return False
+
+    def get_gpu_uuids(self) -> list[str]:
+        """Get list of GPU UUIDs used by this SGLang server.
+
+        Returns:
+            List of GPU UUIDs (e.g., ["GPU-xxxxx", "GPU-yyyyy"])
+        """
+        from nemo_rl.utils.nvml import get_device_uuid
+
+        # Get all GPU UUIDs used by this server
+        # SGLang server uses GPUs starting from base_gpu_id with tp_size GPUs
+        gpu_uuids = []
+        for i in range(self.server_args.tp_size):
+            gpu_id = self.server_args.base_gpu_id + i
+            uuid = get_device_uuid(gpu_id)
+            gpu_uuids.append(uuid)
+
+        return gpu_uuids
+
+    def _merge_stop_strings(self, batch_stop_strings):
+        """Merge stop strings from config and batch.
+
+        Args:
+            batch_stop_strings: List of stop strings from batch (one per sample)
+
+        Returns:
+            List of merged stop strings (one per sample)
+        """
+        stop_set: set[str] = set()
+
+        # Add stop strings from config
+        if self.cfg.get("stop_strings"):
+            stop_set.update(self.cfg["stop_strings"])
+
+        # Merge stop strings from batch
+        merged_stop_strings = []
+        for sample_ss in batch_stop_strings:
+            sample_stop_set = stop_set.copy()
+            if sample_ss:
+                if isinstance(sample_ss, str):
+                    sample_stop_set.add(sample_ss)
+                elif isinstance(sample_ss, list):
+                    sample_stop_set.update(sample_ss)
+
+            merged_stop_strings.append(
+                list(sample_stop_set) if sample_stop_set else None
+            )
+
+        return merged_stop_strings
+
+    def _build_sampling_params(
+        self,
+        *,
+        greedy: bool,
+        stop_strings,
+        max_new_tokens: Optional[int] = None,
+        input_len: Optional[int] = None,
+        context_length: Optional[int] = None,
+        sample_index: Optional[int] = None,
+    ) -> dict[str, Any]:
+        """Build sampling parameters dictionary for SGLang API.
+
+        Args:
+            greedy: Whether to use greedy decoding (temperature=0.0)
+            stop_strings: Merged stop strings (not used here, handled per sample)
+            max_new_tokens: Override max_new_tokens from config if provided
+            input_len: Input length for this sample (used for context_length adjustment)
+            context_length: Maximum context length (if provided, adjusts max_new_tokens)
+            sample_index: Sample index (used for warning messages, 0-indexed)
+
+        Returns:
+            Dictionary of sampling parameters compatible with SGLang API
+        """
+        top_k_cfg = self.cfg.get("top_k")
+        top_k_val = 1 if greedy else (top_k_cfg if top_k_cfg is not None else -1)
+        temperature = 0.0 if greedy else self.cfg["temperature"]
+
+        base_max_tokens = (
+            max_new_tokens if max_new_tokens is not None else self.cfg["max_new_tokens"]
+        )
+
+        # TODO: check if this is needed
+        final_max_tokens = base_max_tokens
+        if context_length is not None and input_len is not None:
+            max_allowed_new_tokens = max(0, context_length - input_len - 1)
+            if base_max_tokens > max_allowed_new_tokens:
+                final_max_tokens = max_allowed_new_tokens
+                if sample_index == 0:
+                    logger.warning(
+                        f"[SGLang Worker] Rank {self.global_rank} Warning: "
+                        f"Sample {sample_index} input length ({input_len}) + max_new_tokens ({base_max_tokens}) "
+                        f"would exceed context_length ({context_length}). "
+                        f"Reducing max_new_tokens to {final_max_tokens} for this sample."
+                    )
+
+        # Build sampling params dict
+        sampling_params = {
+            "temperature": temperature,
+            "top_p": self.cfg.get("top_p", 1.0),
+            "max_new_tokens": final_max_tokens,
+        }
+
+        if top_k_val != -1:
+            sampling_params["top_k"] = top_k_val
+
+        stop_token_ids = self.cfg.get("stop_token_ids")
+        if stop_token_ids is not None:
+            sampling_params["stop_token_ids"] = stop_token_ids
+
+        return sampling_params
+
+    async def _ensure_session(self):
+        if self.session is None:
+            # Create connector with connection pool limit
+            self.connector = aiohttp.TCPConnector(limit=512, limit_per_host=512)
+            # Create session with timeout
+            timeout = aiohttp.ClientTimeout(total=300)  # 5 minutes timeout
+            self.session = aiohttp.ClientSession(
+                connector=self.connector, timeout=timeout
+            )
+        return self.session
+
+    async def _generate_single_sample(
+        self,
+        input_ids: list[int],
+        sampling_params: dict[str, Any],
+        stop_string: Optional[str] = None,
+    ) -> tuple[list[int], list[float]]:
+        """Generate a single sample using SGLang API (async function).
+
+        Args:
+            input_ids: List of input token IDs (without padding)
+            sampling_params: Dictionary of sampling parameters (temperature, top_p, max_new_tokens, etc.)
+            stop_string: Optional stop string for this sample
+
+        Returns:
+            Tuple of (generated_tokens, logprobs):
+                - generated_tokens: List of generated token IDs
+                - logprobs: List of log probabilities for generated tokens
+        """
+        # Prepare payload for SGLang API
+        # Note: stop should be in sampling_params, not in payload top level
+        # TODO: double check this
+        if stop_string is not None:
+            # stop can be a string or list of strings
+            sampling_params = sampling_params.copy()  # Don't modify the original
+            sampling_params["stop"] = stop_string
+
+        payload = {
+            "sampling_params": sampling_params,
+            "return_logprob": True,
+            "input_ids": input_ids,
+        }
+
+        url = f"{self.base_url}/generate"
+        headers = {
+            "Content-Type": "application/json; charset=utf-8",
+        }
+
+        session = await self._ensure_session()
+
+        try:
+            async with session.post(url, json=payload, headers=headers) as response:
+                response.raise_for_status()
+                result = await response.json()
+        except Exception as e:
+            logger.error(
+                f"[SGLang Worker] Rank {self.global_rank} Request failed for input_len={len(input_ids)}: {e}"
+            )
+            raise
+
+        # Extract generated tokens and logprobs
+        meta_info = result.get("meta_info", {})
+        output_token_logprobs = meta_info.get("output_token_logprobs", [])
+
+        if output_token_logprobs:
+            new_tokens = [item[1] for item in output_token_logprobs]
+            new_logprobs = [item[0] for item in output_token_logprobs]
+        else:
+            # Fallback: empty if token logprobs not available
+            new_tokens = []
+            new_logprobs = []
+
+        return new_tokens, new_logprobs
+
+    async def _generate_async(self, tasks):
+        """Execute generation tasks with concurrency control.
+
+        TEMP: Uses a semaphore to limit the number of concurrent requests per server, preventing server overload.
+        A router based solution is preffered in the future.
+        """
+        semaphore = asyncio.Semaphore(self.max_concurrent_requests)
+
+        async def wrap(idx, coro):
+            async with semaphore:
+                try:
+                    result = await coro
+                    return idx, result
+                except Exception as e:
+                    raise
+
+        wrapped = [wrap(i, t) for i, t in enumerate(tasks)]
+        results = [None] * len(tasks)
+        count = 0
+
+        for fut in asyncio.as_completed(wrapped):
+            idx, value = await fut
+            results[idx] = value
+            count += 1
+            if count % 50 == 0 or count == len(tasks):
+                logger.debug(
+                    f"[SGLang Worker] Rank {self.global_rank} Completed {count}/{len(tasks)} tasks"
+                )
+
+        return results
+
+    def _launch_server_process(self, server_args: Any) -> multiprocessing.Process:
+        """Launch the SGLang server process and wait for it to be ready."""
+        # Ensure `sglang` is importable when we actually start a server.
+        launch_server, _, kill_process_tree = _require_sglang()
+        p = multiprocessing.Process(target=launch_server, args=(server_args,))
+        p.start()
+
+        # Wait for server to be ready by checking health endpoint
+        # Use the base_url we stored earlier
+        headers = {
+            "Content-Type": "application/json; charset=utf-8",
+        }
+
+        max_wait_time = 300  # 5 minutes timeout
+        start_time = time.time()
+        with requests.Session() as session:
+            while True:
+                if time.time() - start_time > max_wait_time:
+                    kill_process_tree(p.pid)
+                    raise TimeoutError(
+                        f"[SGLang Server] Rank {self.global_rank} Server failed to start within {max_wait_time}s"
+                    )
+                try:
+                    response = session.get(
+                        f"{self.base_url}/health_generate", headers=headers, timeout=10
+                    )
+                    if response.status_code == 200:
+                        logger.info(
+                            f"[SGLang Server] Rank {self.global_rank} Server is ready at {self.base_url}"
+                        )
+                        break
+                except requests.RequestException:
+                    pass
+
+                if not p.is_alive():
+                    raise RuntimeError(
+                        f"[SGLang Server] Rank {self.global_rank} Server process terminated unexpectedly."
+                    )
+
+                time.sleep(2)
+        return p
+
+    @wrap_with_nvtx_name("sglang_genertion_worker/generate")
+    def generate(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate a batch of data using SGLang generation.
+
+        Args:
+            data: BatchedDataDict containing input_ids and input_lengths tensors
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Returns:
+            BatchedDataDict conforming to GenerationOutputSpec:
+                - output_ids: input + generated token IDs with proper padding
+                - logprobs: Log probabilities for tokens
+                - generation_lengths: Lengths of each response
+                - unpadded_sequence_lengths: Lengths of each input + generated sequence
+        """
+        # Handle empty input case
+        if len(data["input_ids"]) == 0:
+            return BatchedDataDict[GenerationOutputSpec](
+                {
+                    "output_ids": torch.zeros((0, 0), dtype=torch.long),
+                    "logprobs": torch.zeros((0, 0), dtype=torch.float),
+                    "generation_lengths": torch.zeros(0, dtype=torch.long),
+                    "unpadded_sequence_lengths": torch.zeros(0, dtype=torch.long),
+                }
+            )
+
+        input_ids = data["input_ids"]
+        input_lengths = data["input_lengths"]
+        batch_stop_strings = data.get("stop_strings", [None] * len(input_lengths))
+        stop_strings = self._merge_stop_strings(batch_stop_strings)
+        batch_size = len(input_lengths)
+        pad_token_id = self.cfg["_pad_token_id"]
+
+        # Verify inputs have correct padding
+        verify_right_padding(data, pad_value=pad_token_id)
+
+        # Original input length with padding
+        padded_input_length = input_ids.size(1)
+
+        logger.debug(
+            f"[SGLang Worker] Rank {self.global_rank} batch_size: {batch_size}, padded_input_length: {padded_input_length}"
+        )
+
+        if batch_size == 0:
+            raise ValueError("Empty batch received")
+
+        context_length = self.sglang_cfg.get("context_length", None)
+
+        # Create async tasks for all samples
+        tasks = []
+        for i in range(batch_size):
+            input_len = input_lengths[i].item()
+
+            # Truncate input if it exceeds context_length
+            if context_length is not None and input_len >= context_length:
+                input_len = context_length - 1
+
+            valid_input_ids = input_ids[i, :input_len].tolist()
+
+            # Build sampling params for this sample (with context_length adjustment)
+            sample_sampling_params = self._build_sampling_params(
+                greedy=greedy,
+                stop_strings=stop_strings,
+                max_new_tokens=None,
+                input_len=input_len,
+                context_length=context_length,
+                sample_index=i,
+            )
+
+            tasks.append(
+                self._generate_single_sample(
+                    input_ids=valid_input_ids,
+                    sampling_params=sample_sampling_params,
+                    stop_string=stop_strings[i],
+                )
+            )
+
+        # Execute all requests concurrently using the dedicated event loop thread
+        try:
+            all_results = self.async_loop_thread.run(self._generate_async(tasks))
+        except Exception as e:
+            raise
+
+        total_generated_tokens = sum(len(tokens) for tokens, _ in all_results)
+        avg_generation_length = (
+            total_generated_tokens / batch_size if batch_size > 0 else 0
+        )
+
+        # Process results
+        output_ids_list = []
+        logprobs_list = []
+        generation_lengths_list = []
+        unpadded_sequence_lengths_list = []
+        max_length = 0
+
+        # First pass: calculate max_length
+        for i, (new_tokens, new_logprobs) in enumerate(all_results):
+            input_len = input_lengths[i].item()
+            generation_length = len(new_tokens)
+            unpadded_length = input_len + generation_length
+            max_length = max(max_length, unpadded_length)
+
+        total_length = max(max_length, padded_input_length)
+
+        for i, (new_tokens, new_logprobs) in enumerate(all_results):
+            input_len = input_lengths[i].item()
+            generation_length = len(new_tokens)
+            unpadded_length = input_len + generation_length
+
+            full_output = torch.full(
+                (total_length,), pad_token_id, dtype=input_ids.dtype
+            )
+            full_output[:input_len] = input_ids[i][:input_len]
+
+            # Add generated tokens after the original input
+            if new_tokens:
+                full_output[input_len : input_len + len(new_tokens)] = torch.tensor(
+                    new_tokens, dtype=input_ids.dtype
+                )
+
+            # Construct logprobs: zeros for input tokens, actual logprobs for generated tokens
+            full_logprobs = torch.zeros(total_length, dtype=torch.float32)
+            if new_logprobs:
+                for idx, logprob in enumerate(new_logprobs):
+                    position = input_len + idx
+                    full_logprobs[position] = logprob
+
+            output_ids_list.append(full_output)
+            logprobs_list.append(full_logprobs)
+            generation_lengths_list.append(generation_length)
+            unpadded_sequence_lengths_list.append(unpadded_length)
+
+        # Stack into tensors
+        output_ids = torch.stack(output_ids_list)
+        logprobs = torch.stack(logprobs_list)
+        generation_lengths = torch.tensor(generation_lengths_list, dtype=torch.long)
+        unpadded_sequence_lengths = torch.tensor(
+            unpadded_sequence_lengths_list, dtype=torch.long
+        )
+        logger.debug(
+            f"[SGLang Worker] Rank {self.global_rank} Generated {total_generated_tokens} tokens across {batch_size} samples (avg: {avg_generation_length:.1f} tokens/sample)"
+        )
+        return BatchedDataDict[GenerationOutputSpec](
+            {
+                "output_ids": output_ids,
+                "generation_lengths": generation_lengths,
+                "unpadded_sequence_lengths": unpadded_sequence_lengths,
+                "logprobs": logprobs,
+            }
+        )
+
+    def sleep(self):
+        # TODO
+        pass
+
+    def wake_up(self, **kwargs):
+        # TODO
+        pass
+
+    def shutdown(self) -> bool:
+        """Shutdown the SGLang server process and cleanup async resources.
+
+        Returns:
+            bool: True if shutdown was successful, False otherwise
+        """
+        if not self.is_model_owner:
+            if hasattr(self, "async_loop_thread"):
+                try:
+                    self.async_loop_thread.shutdown()
+                    logger.info(
+                        f"[SGLang Worker] Rank {self.global_rank} Async loop thread shut down."
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"[SGLang Worker] Rank {self.global_rank} Error shutting down async loop thread: {e}"
+                    )
+            return True
+
+        try:
+            # Only model owners started a server process; they require sglang for shutdown.
+            _, _, kill_process_tree = _require_sglang()
+            if hasattr(self, "session") and self.session is not None:
+                try:
+
+                    async def close_session():
+                        await self.session.close()
+                        if self.connector is not None:
+                            await self.connector.close()
+
+                    self.async_loop_thread.run(close_session())
+                    logger.info(
+                        f"[SGLang Worker] Rank {self.global_rank} aiohttp session closed."
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"[SGLang Worker] Rank {self.global_rank} Error closing aiohttp session: {e}"
+                    )
+
+            # Shutdown async loop thread after session cleanup
+            if hasattr(self, "async_loop_thread"):
+                try:
+                    self.async_loop_thread.shutdown()
+                    logger.info(
+                        f"[SGLang Worker] Rank {self.global_rank} Async loop thread shut down."
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"[SGLang Worker] Rank {self.global_rank} Error shutting down async loop thread: {e}"
+                    )
+
+            if not hasattr(self, "server_process") or self.server_process is None:
+                return True
+
+            logger.info(
+                f"[SGLang Worker] Rank {self.global_rank} Shutting down server at {self.base_url}..."
+            )
+
+            if self.server_process.is_alive():
+                kill_process_tree(self.server_process.pid)
+
+            # Wait for the process to terminate
+            self.server_process.join(timeout=5.0)
+
+            if self.server_process.is_alive():
+                return False
+            return True
+
+        except Exception as e:
+            logger.error(
+                f"[SGLang Worker] Rank {self.global_rank} Error during shutdown: {e}"
+            )
+            return False
+
+    def _make_request(self, endpoint: str, payload: Optional[dict] = None):
+        """Make a POST request to the specified endpoint with the given payload.
+
+        Args:
+            endpoint: The API endpoint to call
+            payload: The JSON payload to send (default: empty dict)
+
+        Returns:
+            The JSON response from the server
+        """
+        # Use the stored base_url instead of constructing from server_args
+        url = f"{self.base_url}/{endpoint}"
+        headers = {
+            "Content-Type": "application/json; charset=utf-8",
+        }
+        response = requests.post(url, json=payload or {}, headers=headers, timeout=60)
+        response.raise_for_status()
+        return response.json()
diff --git a/nemo_rl/models/generation/sglang/utils.py b/nemo_rl/models/generation/sglang/utils.py
new file mode 100644
index 0000000000..7460302b5a
--- /dev/null
+++ b/nemo_rl/models/generation/sglang/utils.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import threading
+
+
+class AsyncLoopThread:
+    """A background event loop thread for running async operations in Ray actors.
+
+    This class creates a dedicated thread with its own event loop, allowing
+    synchronous Ray actor methods to execute async coroutines without blocking
+    the main actor thread. This is necessary because run_coroutine_threadsafe
+    requires the event loop to be in a different thread.
+    """
+
+    def __init__(self):
+        self.loop = asyncio.new_event_loop()
+        self._ready = threading.Event()
+        self._thread = threading.Thread(target=self._start_loop, daemon=True)
+        self._thread.start()
+        if not self._ready.wait(timeout=5.0):
+            raise RuntimeError("Event loop thread failed to start within 5 seconds")
+
+    def _start_loop(self):
+        """Run the event loop in the background thread."""
+        asyncio.set_event_loop(self.loop)
+        self._ready.set()
+        self.loop.run_forever()
+
+    def run(self, coro):
+        """Schedule a coroutine onto the loop and block until it's done.
+
+        Args:
+            coro: The coroutine to execute
+
+        Returns:
+            The result of the coroutine
+        """
+        if not self.loop.is_running():
+            raise RuntimeError("Event loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self.loop)
+        result = future.result()
+        return result
+
+    def shutdown(self):
+        """Shutdown the event loop and wait for the thread to finish."""
+        if self.loop.is_running():
+            self.loop.call_soon_threadsafe(self.loop.stop)
+        self._thread.join(timeout=2.0)
+        if not self.loop.is_closed():
+            self.loop.close()
diff --git a/nemo_rl/models/generation/vllm/vllm_generation.py b/nemo_rl/models/generation/vllm/vllm_generation.py
index 93540ebe82..1366ce28c5 100644
--- a/nemo_rl/models/generation/vllm/vllm_generation.py
+++ b/nemo_rl/models/generation/vllm/vllm_generation.py
@@ -876,6 +876,14 @@ def clear_vllm_logger_metrics(self) -> None:
         )
         ray.get(futures)
 
+    def clear_logger_metrics(self) -> None:
+        """Clear logger metrics for performance reporting."""
+        self.clear_vllm_logger_metrics()
+
+    def get_logger_metrics(self) -> dict[str, Any]:
+        """Get logger metrics for performance reporting."""
+        return self.get_vllm_logger_metrics()
+
     def __del__(self) -> None:
         """Shuts down the worker groups when the object is deleted or is garbage collected.
 
diff --git a/nemo_rl/models/megatron/__init__.py b/nemo_rl/models/megatron/__init__.py
index e69de29bb2..4fc25d0d3c 100644
--- a/nemo_rl/models/megatron/__init__.py
+++ b/nemo_rl/models/megatron/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_rl/models/megatron/config.py b/nemo_rl/models/megatron/config.py
new file mode 100644
index 0000000000..7e249affcd
--- /dev/null
+++ b/nemo_rl/models/megatron/config.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Callable, NamedTuple, Optional, TypedDict
+
+import torch
+from megatron.bridge.training.config import ConfigContainer
+from megatron.bridge.training.state import GlobalState
+from megatron.core.optimizer import MegatronOptimizer
+from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
+from megatron.core.transformer import MegatronModule
+
+
+class MegatronGenerationConfig(TypedDict):
+    # Total GPU memory (in GB) allocated for KV cache buffers
+    buffer_size_gb: int
+    # Fraction of buffer reserved for guaranteed active requests
+    buffer_guaranteed_fraction: float
+    # Number of CUDA graphs to pre-compile for different batch sizes
+    num_cuda_graphs: int
+    # Size of each KV cache block in tokens (affects memory granularity)
+    block_size_tokens: int
+    # Enable CUDA graphs for prefill/context processing
+    use_cuda_graphs_for_non_decode_steps: bool
+    # Split long prefills into chunks for better memory management
+    enable_chunked_prefill: bool
+    # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
+    unified_memory_level: int
+    # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens.
+    # Can cause OOM if set too high so should be tuned with buffer_size_gb if OOMing. If set too
+    # low, then will only do 512 tokens at a time, which can be slow.
+    max_tokens: int
+
+
+## returned from validate_and_set_config
+class RuntimeConfig(NamedTuple):
+    """Runtime configuration for model training and inference.
+
+    This contains all validated runtime settings needed for model initialization,
+    parallelization, and training.
+    """
+
+    megatron_cfg: ConfigContainer
+    model_cfg: Any
+    dtype: torch.dtype
+    optimizer_cpu_offload: bool
+    offload_optimizer_for_logprob: bool
+    is_generation_colocated: Optional[bool]
+    final_padded_vocab_size: int
+
+
+## returned from setup_model_and_optimizer
+class ModelAndOptimizerState(NamedTuple):
+    """Container for model and optimizer state.
+
+    This named tuple holds all model-related state including the model itself,
+    optimizer, scheduler, and metadata about the model type and configuration.
+    """
+
+    state: GlobalState
+    model: MegatronModule
+    optimizer: MegatronOptimizer
+    scheduler: OptimizerParamScheduler
+    checkpointing_context: dict[str, Any]
+    param_sync_func: Optional[Callable]
diff --git a/nemo_rl/models/megatron/setup.py b/nemo_rl/models/megatron/setup.py
new file mode 100644
index 0000000000..57c85ea352
--- /dev/null
+++ b/nemo_rl/models/megatron/setup.py
@@ -0,0 +1,998 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+import warnings
+from typing import Any, Optional, TypeVar
+
+import torch
+from megatron.bridge import AutoBridge
+from megatron.bridge.models.model_provider import get_model
+from megatron.bridge.peft.lora import LoRA
+from megatron.bridge.training import fault_tolerance
+from megatron.bridge.training.checkpointing import (
+    checkpoint_exists,
+    init_checkpointing_context,
+    load_checkpoint,
+)
+from megatron.bridge.training.config import (
+    CheckpointConfig,
+    ConfigContainer,
+    DistributedDataParallelConfig,
+    LoggerConfig,
+    OptimizerConfig,
+    SchedulerConfig,
+    TokenizerConfig,
+    TrainingConfig,
+)
+from megatron.bridge.training.initialize import (
+    initialize_megatron,
+    set_jit_fusion_options,
+)
+from megatron.bridge.training.optim import setup_optimizer
+from megatron.bridge.training.setup import (
+    _create_peft_pre_wrap_hook,
+    _update_model_config_funcs,
+)
+from megatron.bridge.training.state import GlobalState
+from megatron.bridge.training.tokenizers.tokenizer import build_tokenizer
+from megatron.bridge.utils.instantiate_utils import InstantiationMode
+from megatron.bridge.utils.vocab_utils import calculate_padded_vocab_size
+from megatron.core import parallel_state
+from megatron.core.process_groups_config import ProcessGroupCollection
+from megatron.core.transformer import MegatronModule
+from megatron.core.transformer.module import Float16Module
+from megatron.core.transformer.transformer_config import TransformerConfig
+from transformers import PreTrainedTokenizerBase
+
+try:
+    from megatron.core.distributed import (
+        TorchFullyShardedDataParallel as torch_FSDP,  # noqa: F401 unused-import
+    )
+
+    HAVE_FSDP2 = True
+except ImportError:
+    HAVE_FSDP2 = False
+
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.models.megatron.community_import import import_model_from_hf_name
+from nemo_rl.models.megatron.config import ModelAndOptimizerState, RuntimeConfig
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.utils import (
+    configure_dynamo_cache,
+    get_megatron_checkpoint_dir,
+)
+
+TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
+
+
+def destroy_parallel_state():
+    """Safely destroy parallel state and reset async call tracking.
+
+    This function is called during initialization to clean up temporary distributed
+    state from model import operations. Resetting async call tracking ensures that
+    when the main Megatron distributed context is created, all ranks start with
+    consistent call_idx values for async checkpointing.
+    """
+    if torch.distributed.is_initialized():
+        try:
+            torch.distributed.barrier()
+            torch.distributed.destroy_process_group()
+        except:
+            pass  # Ignore errors if already destroyed
+    if hasattr(parallel_state, "destroy_model_parallel"):
+        try:
+            parallel_state.destroy_model_parallel()
+        except:
+            pass  # Ignore errors if already destroyed
+
+    # Reset async calls queue to prevent call_idx mismatches after distributed context recreation
+    try:
+        import nemo.tron.utils.async_utils as nemo_async_utils
+        from megatron.core.dist_checkpointing.strategies.async_utils import (
+            AsyncCallsQueue,
+        )
+
+        # Clean up any existing async callers first
+        old_call_idx = getattr(nemo_async_utils._async_calls_queue, "call_idx", None)
+        num_unfinalized = (
+            nemo_async_utils._async_calls_queue.get_num_unfinalized_calls()
+        )
+        if num_unfinalized > 0:
+            print(
+                f"[WARNING] Resetting async calls queue with {num_unfinalized} unfinalized calls"
+            )
+        try:
+            nemo_async_utils._async_calls_queue.close()
+        except:
+            pass  # Ignore errors during cleanup
+        # Reset the global async calls queue by creating a new instance
+        nemo_async_utils._async_calls_queue = AsyncCallsQueue()
+    except ImportError:
+        pass
+
+    # Also reset the Megatron async calls queue if it exists
+    try:
+        import megatron.training.async_utils as megatron_async_utils
+        from megatron.core.dist_checkpointing.strategies.async_utils import (
+            AsyncCallsQueue,
+        )
+
+        # Clean up any existing async callers first
+        old_call_idx = getattr(
+            megatron_async_utils._async_calls_queue, "call_idx", None
+        )
+        num_unfinalized = (
+            megatron_async_utils._async_calls_queue.get_num_unfinalized_calls()
+        )
+        if num_unfinalized > 0:
+            print(
+                f"[WARNING] Resetting Megatron async calls queue with {num_unfinalized} unfinalized calls"
+            )
+        try:
+            megatron_async_utils._async_calls_queue.close()
+        except:
+            pass  # Ignore errors during cleanup
+        # Reset the Megatron global async calls queue as well
+        megatron_async_utils._async_calls_queue = AsyncCallsQueue()
+        print(
+            f"[DEBUG] Reset Megatron async calls queue (old call_idx: {old_call_idx})"
+        )
+    except ImportError:
+        pass
+
+    # Reset the third global async_calls instance in base strategy module
+    try:
+        import megatron.core.dist_checkpointing.strategies.base as base_strategy
+        from megatron.core.dist_checkpointing.strategies.async_utils import (
+            AsyncCallsQueue,
+        )
+
+        # Clean up and reset the global async_calls in base strategy
+        old_call_idx = getattr(base_strategy.async_calls, "call_idx", None)
+        num_unfinalized = base_strategy.async_calls.get_num_unfinalized_calls()
+        if num_unfinalized > 0:
+            print(
+                f"[WARNING] Resetting base strategy async_calls with {num_unfinalized} unfinalized calls"
+            )
+        try:
+            base_strategy.async_calls.close()
+        except:
+            pass
+        base_strategy.async_calls = AsyncCallsQueue()
+        print(f"[DEBUG] Reset base strategy async_calls (old call_idx: {old_call_idx})")
+    except ImportError:
+        pass
+
+
+def setup_distributed() -> None:
+    """Handle NCCL settings, dtype mapping, and basic config setup."""
+    # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
+    # with different order of node_bundles
+    configure_dynamo_cache()
+    # Ensure clean slate before import
+    destroy_parallel_state()
+    # Need to initialize the process group before calling into Megatron-Bridge, otherwise Megatron-Bridge will try to set an incorrect device
+    torch.distributed.init_process_group("nccl")
+
+
+def validate_and_set_config(
+    config,
+    rank,
+    hf_model_name,
+    pretrained_path,
+    weights_path,
+    tokenizer,
+):
+    # Handle generation colocation
+    is_generation_colocated = None
+    if "generation" in config and config["generation"] is not None:
+        is_generation_colocated = config["generation"]["colocated"]["enabled"]
+
+    # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+    # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
+    if not is_generation_colocated:
+        os.environ["NCCL_CUMEM_ENABLE"] = "1"
+
+    # Setup data types
+    dtype_map = {
+        "float32": torch.float32,
+        "bfloat16": torch.bfloat16,
+        "float16": torch.float16,
+    }
+    dtype = dtype_map[config["precision"]]
+
+    # Optimizer configuration
+    optimizer_cpu_offload = config["megatron_cfg"]["optimizer"]["optimizer_cpu_offload"]
+    offload_optimizer_for_logprob = config["offload_optimizer_for_logprob"]
+
+    # Reward models are not yet supported with Megatron.
+    if "reward_model_cfg" in config and config["reward_model_cfg"]["enabled"]:
+        raise NotImplementedError(
+            "Reward models are not yet supported with the Megatron backend, this issue is "
+            "tracked in https://github.com/NVIDIA-NeMo/RL/issues/720"
+        )
+
+    megatron_cfg, model_cfg = setup_model_config(
+        config, rank, dtype, hf_model_name, pretrained_path, weights_path
+    )
+
+    final_padded_vocab_size = calculate_padded_vocab_size(
+        megatron_cfg.model.vocab_size,
+        megatron_cfg.model.make_vocab_size_divisible_by,
+        config["megatron_cfg"]["tensor_model_parallel_size"],
+    )
+
+    return RuntimeConfig(
+        megatron_cfg,
+        model_cfg,
+        dtype,
+        optimizer_cpu_offload,
+        offload_optimizer_for_logprob,
+        is_generation_colocated,
+        final_padded_vocab_size,
+    )
+
+
+def validate_model_paths(config: PolicyConfig) -> tuple[str, str, bool]:
+    """Validate and setup model paths."""
+    # cfg["model_name"] is allowed to be either an HF model name or a path to an HF checkpoint
+    hf_model_name = config["model_name"]
+
+    # Check if the checkpoint already exists
+    hf_model_subdir = hf_model_name
+    if os.path.exists(hf_model_name):
+        hf_model_subdir = f"model_{hf_model_subdir.replace('/', '_')}"
+
+    pretrained_path = f"{get_megatron_checkpoint_dir()}/{hf_model_subdir}"
+    pt_checkpoint_exists = os.path.exists(pretrained_path) and os.path.exists(
+        os.path.join(pretrained_path, "iter_0000000")
+    )
+
+    return hf_model_name, pretrained_path, pt_checkpoint_exists
+
+
+def setup_model_config(
+    config: PolicyConfig,
+    rank,
+    dtype,
+    hf_model_name: str,
+    pretrained_path: str,
+    weights_path: Optional[str] = None,
+) -> tuple[ConfigContainer, Any]:
+    """Handle all the model configuration logic."""
+    # Load pretrained run config
+    pretrained_run_config = os.path.join(
+        pretrained_path, "iter_0000000/run_config.yaml"
+    )
+
+    if not os.path.exists(pretrained_run_config):
+        raise FileNotFoundError(
+            f"Pretrained run config not found at {pretrained_run_config} on rank={rank}. "
+            "This usually means that the one-time HF->mcore conversion on rank=0 saved to a directory "
+            "not being mounted on this node. Please check"
+        )
+
+    try:
+        cfg_from_pretrained = ConfigContainer.from_yaml(
+            pretrained_run_config, mode=InstantiationMode.STRICT
+        )
+    except Exception as e:
+        # Add helpful context as a note to the exception
+        e.add_note(
+            f"\n{'=' * 80}\n"
+            f"NOTE: A common cause of this error is when the HF->mcore converted checkpoint is\n"
+            f"created with an older version of megatron-bridge.\n"
+            f"If this checkpoint is old or was generated by a different code version,\n"
+            f"try deleting it and rerunning the code.\n"
+            f"The checkpoint will be automatically regenerated with the current version.\n\n"
+            f"Checkpoint location: {pretrained_path}\n"
+            f"{'=' * 80}"
+        )
+        raise
+
+    model_cfg = cfg_from_pretrained.model
+    cfg_from_pretrained.logger = LoggerConfig()
+
+    # Apply parallelism settings
+    _apply_parallelism_config(model_cfg, config)
+
+    # Apply MoE settings
+    _apply_moe_config(model_cfg, config)
+
+    # Apply precision settings
+    _apply_precision_config(model_cfg, config, dtype)
+
+    # Apply performance settings
+    _apply_performance_config(model_cfg, config)
+
+    # Validate optimizer configuration
+    _validate_optimizer_config(config)
+
+    # Optional layernorm epsilon
+    if "layernorm_epsilon" in config["megatron_cfg"]:
+        model_cfg.layernorm_epsilon = config["megatron_cfg"]["layernorm_epsilon"]
+
+    # Validate chunking configuration
+    _validate_chunking_config(config)
+
+    # Create checkpoint configs
+    checkpoint_config = _create_checkpoint_config(pretrained_path, weights_path)
+
+    # Validate training configuration
+    _validate_training_config(config, model_cfg)
+
+    # Create final megatron config
+    megatron_cfg = _create_megatron_config(
+        model_cfg, checkpoint_config, config, hf_model_name, dtype
+    )
+
+    _validate_dtype_config(dtype, megatron_cfg.model, megatron_cfg.optimizer)
+
+    return megatron_cfg, model_cfg
+
+
+def _apply_parallelism_config(model_cfg: Any, config: PolicyConfig) -> None:
+    """Apply tensor/pipeline/context parallelism configuration."""
+    model_cfg.tensor_model_parallel_size = config["megatron_cfg"][
+        "tensor_model_parallel_size"
+    ]
+    model_cfg.pipeline_model_parallel_size = config["megatron_cfg"][
+        "pipeline_model_parallel_size"
+    ]
+    model_cfg.num_layers_in_first_pipeline_stage = config["megatron_cfg"][
+        "num_layers_in_first_pipeline_stage"
+    ]
+    model_cfg.num_layers_in_last_pipeline_stage = config["megatron_cfg"][
+        "num_layers_in_last_pipeline_stage"
+    ]
+    model_cfg.sequence_parallel = config["megatron_cfg"]["sequence_parallel"]
+    model_cfg.context_parallel_size = config["megatron_cfg"]["context_parallel_size"]
+
+    if model_cfg.context_parallel_size > 1:
+        assert config["sequence_packing"]["enabled"], (
+            "Sequence Packing must be enabled to use Context Parallelism with MCore"
+        )
+
+
+def _apply_moe_config(model_cfg: Any, config: PolicyConfig) -> None:
+    """Apply Mixture of Experts configuration."""
+    model_cfg.expert_tensor_parallel_size = config["megatron_cfg"][
+        "expert_tensor_parallel_size"
+    ]
+    model_cfg.expert_model_parallel_size = config["megatron_cfg"][
+        "expert_model_parallel_size"
+    ]
+
+    # MoE stability settings
+
+    # Setting moe_router_dtype to higher precision (e.g. fp64) can improve numerical stability,
+    # especially when using many experts.
+    model_cfg.moe_router_dtype = config["megatron_cfg"]["moe_router_dtype"]
+
+    # The below two configs (and "freeze_moe_router") are used to stabilize moe training
+    # by preventing updates to the moe router. We found that this is helpful in reducing
+    # logprob error during training.
+
+    # Set this to "none" to disable load balancing loss.
+    model_cfg.moe_router_load_balancing_type = config["megatron_cfg"][
+        "moe_router_load_balancing_type"
+    ]
+    # Set this to 0.0 to disable updates to the moe router expert bias
+    model_cfg.moe_router_bias_update_rate = config["megatron_cfg"][
+        "moe_router_bias_update_rate"
+    ]
+
+    model_cfg.moe_enable_deepep = config["megatron_cfg"]["moe_enable_deepep"]
+    model_cfg.moe_token_dispatcher_type = config["megatron_cfg"][
+        "moe_token_dispatcher_type"
+    ]
+    model_cfg.moe_shared_expert_overlap = config["megatron_cfg"][
+        "moe_shared_expert_overlap"
+    ]
+
+    model_cfg.moe_permute_fusion = config["megatron_cfg"]["moe_permute_fusion"]
+
+
+def _apply_precision_config(
+    model_cfg: Any, config: PolicyConfig, dtype: torch.dtype
+) -> None:
+    """Apply precision and dtype configuration."""
+    model_cfg.bf16 = dtype == torch.bfloat16
+    model_cfg.fp16 = dtype == torch.float16
+
+    if model_cfg.fp16:
+        assert not model_cfg.bf16, "fp16 and bf16 cannot be used together"
+        model_cfg.params_dtype = torch.float16
+    elif model_cfg.bf16:
+        assert not model_cfg.fp16, "fp16 and bf16 cannot be used together"
+        model_cfg.params_dtype = torch.bfloat16
+    else:
+        model_cfg.params_dtype = torch.float32
+
+    dtype_map = {
+        "float32": torch.float32,
+        "bfloat16": torch.bfloat16,
+        "float16": torch.float16,
+    }
+    model_cfg.pipeline_dtype = dtype_map[config["megatron_cfg"]["pipeline_dtype"]]
+
+
+def _apply_performance_config(model_cfg: Any, config: PolicyConfig) -> None:
+    """Apply performance optimization configuration."""
+    model_cfg.parallel_output = True
+
+    # Activation checkpointing
+    if config["megatron_cfg"]["activation_checkpointing"]:
+        model_cfg.recompute_granularity = "full"
+        model_cfg.recompute_method = "uniform"
+        model_cfg.recompute_num_layers = 1
+
+    # Activation function validation
+    if not model_cfg.gated_linear_unit:
+        assert model_cfg.activation_func is not None, (
+            "activation_func must be set if not using gated_linear_unit. This likely "
+            "indicates an issue in configuration conversion (e.g. activation func was "
+            "a lambda and couldn't be serialized). This is based on this check "
+            "https://github.com/NVIDIA/Megatron-LM/blob/1ab876ddc4c1893c76f26d775226a8d1dcdfb3d2/megatron/core/transformer/mlp.py#L174."
+        )
+
+    # Fusion settings
+    model_cfg.apply_rope_fusion = config["megatron_cfg"]["apply_rope_fusion"]
+    model_cfg.bias_activation_fusion = config["megatron_cfg"]["bias_activation_fusion"]
+
+    # FP8 configuration
+    fp8_cfg = config["megatron_cfg"].get("fp8_cfg", None)
+    if fp8_cfg is not None and fp8_cfg.get("enabled", False):
+        try:
+            model_cfg.fp8 = fp8_cfg["fp8"]
+            model_cfg.fp8_recipe = fp8_cfg["fp8_recipe"]
+            model_cfg.fp8_param = fp8_cfg["fp8_param"]
+        except KeyError as e:
+            raise KeyError(f"Missing key in fp8_cfg: {e}")
+
+        if model_cfg.fp8_param:
+            warnings.warn(
+                "Setting fp8_param=True sometimes causes NaN token_mult_prob_error, please use with caution. "
+                "Refer to https://github.com/NVIDIA-NeMo/RL/issues/1164 for latest updates with this issue."
+            )
+
+
+def _validate_optimizer_config(config: PolicyConfig) -> None:
+    """Validate optimizer configuration."""
+    optimizer_cpu_offload = config["megatron_cfg"]["optimizer"]["optimizer_cpu_offload"]
+    optimizer_offload_fraction = config["megatron_cfg"]["optimizer"][
+        "optimizer_offload_fraction"
+    ]
+
+    if optimizer_cpu_offload:
+        # Currently, hybrid optimizer (partly on GPU and partly on CPU) is not supported because it conflicts with the way
+        # Nemo-rl handles the optimizer offload/onload between generation and training. So if using CPU optimizer the offload_fraction should be 1.0.
+        assert optimizer_offload_fraction == 1.0, (
+            "Currently for optimizer offloading, only optimizer_offload_fraction=1.0 is supported"
+        )
+
+
+def _validate_chunking_config(config: PolicyConfig) -> None:
+    """Validate chunking configuration."""
+    if (
+        "logprob_chunk_size" in config
+        and config["logprob_chunk_size"] is not None
+        and config["logprob_chunk_size"] > 0
+    ):
+        assert config["megatron_cfg"]["defer_fp32_logits"], (
+            "defer_fp32_logits must be True if logprob_chunk_size is set"
+        )
+
+
+def _create_checkpoint_config(
+    pretrained_path: str, weights_path: Optional[str]
+) -> CheckpointConfig:
+    """Create checkpoint configurations."""
+    return CheckpointConfig(
+        save_interval=100,
+        save=weights_path,
+        load=weights_path,
+        pretrained_checkpoint=pretrained_path,
+        async_save=False,
+        fully_parallel_save=True,
+        fully_parallel_load=True,
+        load_rng=False,
+    )
+
+
+def _validate_training_config(config: PolicyConfig, model_cfg: Any) -> None:
+    """Validate training configuration."""
+    assert "train_iters" in config["megatron_cfg"], (
+        "train_iters must be set in megatron_cfg. For an example, see "
+        "https://github.com/NVIDIA-NeMo/RL/blob/bccbc377705a81a1f4b3c31ad9767bcc15f735a8/nemo_rl/algorithms/sft.py#L175-L179."
+    )
+
+    ## These settings are required for correct gradient computations in mcore
+    ## when calculate_per_token_loss is True, there is no scaling of the gradient in mcore,
+    ## so we handle the scaling in nemo-rl.
+    ## perform_initialization = True is a workaround to ensure the correct tensor parallel attributes are set
+    ## on the TP-sharded parameters.
+    model_cfg.calculate_per_token_loss = True
+    model_cfg.perform_initialization = True
+
+    # MoE aux loss validation
+    assert (
+        "aux_loss" not in model_cfg.moe_router_load_balancing_type
+        or model_cfg.moe_aux_loss_coeff == 0
+    ), (
+        "MoE aux loss is currently not supported due to a known bug in Megatron-LM. "
+        "See https://github.com/NVIDIA/Megatron-LM/issues/1984 for more details."
+    )
+
+
+def _validate_dtype_config(
+    dtype: torch.dtype, model_cfg: Any, optimizer_cfg: Any
+) -> None:
+    # TODO: this validation should happen inside mbridge: https://github.com/NVIDIA-NeMo/Megatron-Bridge/issues/1665
+    if dtype == torch.bfloat16:
+        assert model_cfg.bf16 == True, (
+            "policy.megatron_cfg.model.bf16=True must be set if policy.precision=bfloat16. This is handled by nemo-rl so this indicates something is misconfigured."
+        )
+        assert (
+            optimizer_cfg.use_precision_aware_optimizer == False
+            or optimizer_cfg.bf16 == True
+        ), (
+            "policy.megatron_cfg.optimizer.bf16=True must be set if policy.precision=bfloat16 when using use_precision_aware_optimizer=True"
+        )
+    elif dtype == torch.float16:
+        assert model_cfg.fp16 == True, (
+            "policy.megatron_cfg.model.fp16=True must be set if policy.precision=float16. This is handled by nemo-rl so this indicates something is misconfigured."
+        )
+        assert (
+            optimizer_cfg.use_precision_aware_optimizer == False
+            or optimizer_cfg.fp16 == True
+        ), (
+            "policy.megatron_cfg.optimizer.fp16=True must be set if policy.precision=float16 when using use_precision_aware_optimizer=True"
+        )
+    elif dtype == torch.float32:
+        assert model_cfg.bf16 == False and model_cfg.fp16 == False, (
+            "policy.megatron_cfg.model.bf16=False and policy.megatron_cfg.model.fp16=False must be set if policy.precision=float32. This is handled by nemo-rl so this indicates something is misconfigured."
+        )
+        assert optimizer_cfg.bf16 == False and optimizer_cfg.fp16 == False, (
+            "policy.megatron_cfg.optimizer.bf16=False and policy.megatron_cfg.optimizer.fp16=False must be set if policy.precision=float32"
+        )
+
+
+def _create_megatron_config(
+    model_cfg: Any,
+    checkpoint_config: CheckpointConfig,
+    config: PolicyConfig,
+    hf_model_name: str,
+    dtype: torch.dtype,
+) -> ConfigContainer:
+    """Create the final Megatron configuration container."""
+    return ConfigContainer(
+        model=model_cfg,
+        checkpoint=checkpoint_config,
+        logger=LoggerConfig(logging_level=0),
+        train=TrainingConfig(
+            micro_batch_size=1,  # ignored
+            global_batch_size=config["train_global_batch_size"],  # ignored
+            train_iters=config["megatron_cfg"]["train_iters"],
+        ),
+        optimizer=OptimizerConfig(**config["megatron_cfg"]["optimizer"]),
+        ddp=DistributedDataParallelConfig(
+            check_for_nan_in_grad=True,
+            grad_reduce_in_fp32=config["megatron_cfg"][
+                "distributed_data_parallel_config"
+            ]["grad_reduce_in_fp32"],
+            overlap_grad_reduce=config["megatron_cfg"][
+                "distributed_data_parallel_config"
+            ]["overlap_grad_reduce"],
+            overlap_param_gather=config["megatron_cfg"][
+                "distributed_data_parallel_config"
+            ]["overlap_param_gather"],
+            # we need to set average_in_collective=False with calculate_per_token_loss=T
+            # otherwise, mcore throws an assertion error.
+            average_in_collective=False,  # Required with calculate_per_token_loss=True
+            use_distributed_optimizer=config["megatron_cfg"]["optimizer"][
+                "use_distributed_optimizer"
+            ],
+            data_parallel_sharding_strategy=config["megatron_cfg"][
+                "distributed_data_parallel_config"
+            ]["data_parallel_sharding_strategy"],
+        ),
+        scheduler=SchedulerConfig(**config["megatron_cfg"]["scheduler"]),
+        dataset=None,
+        tokenizer=TokenizerConfig(
+            tokenizer_type="HuggingFaceTokenizer",
+            tokenizer_model=hf_model_name,
+        ),
+    )
+
+
+def setup_model_and_optimizer(
+    policy_cfg: PolicyConfig,
+    megatron_cfg: ConfigContainer,
+    load_optimizer: bool = True,
+    get_embedding_ranks=None,  # TODO @sahilj: What is this?
+    get_position_embedding_ranks=None,
+):
+    state = GlobalState()
+    state.cfg = megatron_cfg
+    # TODO: Freeze state.cfg
+
+    megatron_cfg.dist.external_gpu_device_mapping = True
+    initialize_megatron(
+        cfg=megatron_cfg,
+        get_embedding_ranks=get_embedding_ranks,
+        get_position_embedding_ranks=get_position_embedding_ranks,
+    )
+
+    if megatron_cfg.ft and megatron_cfg.ft.enable_ft_package:
+        fault_tolerance.setup(megatron_cfg, state)
+        fault_tolerance.maybe_setup_simulated_fault(megatron_cfg.ft)
+
+    # Set pytorch JIT layer fusion options and warmup JIT functions.
+    set_jit_fusion_options(megatron_cfg.model, megatron_cfg.train.micro_batch_size)
+
+    # Adjust the startup time so it reflects the largest value.
+    # This will be closer to what scheduler will see (outside of
+    # image ... launches.
+    start_time_tensor = torch.tensor(
+        [state.start_time], dtype=torch.double, device="cuda"
+    )
+    torch.distributed.all_reduce(start_time_tensor, op=torch.distributed.ReduceOp.MIN)
+    state.start_time = start_time_tensor.item()
+
+    print(
+        "time to initialize megatron (seconds): {:.3f}".format(
+            time.time() - state.start_time
+        )
+    )
+    torch.distributed.barrier()
+
+    # Context used for persisting some state between checkpoint saves.
+    checkpointing_context = init_checkpointing_context(megatron_cfg.checkpoint)
+
+    # Tokenizer
+    build_tokenizer(
+        megatron_cfg.tokenizer,
+        make_vocab_size_divisible_by=megatron_cfg.model.make_vocab_size_divisible_by
+        // megatron_cfg.model.tensor_model_parallel_size,
+        tensor_model_parallel_size=megatron_cfg.model.tensor_model_parallel_size,
+        trust_remote_code=True,
+    )
+    assert megatron_cfg.model.vocab_size, "vocab size must be specified in model config"
+
+    torch.distributed.barrier()
+
+    pre_wrap_hook = []
+
+    use_peft = policy_cfg["megatron_cfg"].get("peft", {}).get("enabled", False)
+
+    mixed_precision_wrapper = Float16Module
+    if policy_cfg["megatron_cfg"]["freeze_moe_router"]:
+        if use_peft:
+            raise ValueError(
+                "Freezing the MOE router is not currently supported when using PEFT"
+            )
+
+        def freeze_moe_router(megatron_model):
+            if not isinstance(megatron_model, list):
+                megatron_model = [megatron_model]
+            for model_module in megatron_model:
+                # Handle both wrapped (Float16Module) and unwrapped models
+                if isinstance(model_module, Float16Module):
+                    model_module = model_module.module
+                # Handle VLM models
+                if hasattr(model_module, "language_model"):
+                    model_module = model_module.language_model
+                for layer in model_module.decoder.layers:
+                    if hasattr(layer, "mlp") and hasattr(layer.mlp, "router"):
+                        layer.mlp.router.weight.requires_grad = False
+
+        mixed_precision_wrapper = MoEFloat16Module
+        pre_wrap_hook.extend([freeze_moe_router])
+
+    if use_peft:
+        peft_cfg = policy_cfg["megatron_cfg"].get("peft", {})
+        peft = LoRA(
+            target_modules=peft_cfg["target_modules"],
+            exclude_modules=peft_cfg["exclude_modules"],
+            dim=peft_cfg["dim"],
+            alpha=peft_cfg["alpha"],
+            dropout=peft_cfg["dropout"],
+            dropout_position=peft_cfg["dropout_position"],
+            lora_A_init_method=peft_cfg["lora_A_init_method"],
+            lora_B_init_method=peft_cfg["lora_B_init_method"],
+            a2a_experimental=peft_cfg["a2a_experimental"],
+            lora_dtype=peft_cfg["lora_dtype"],
+        )
+    else:
+        peft = None
+    megatron_cfg.peft = peft
+
+    if megatron_cfg.peft is not None:
+        pre_peft_hook = _create_peft_pre_wrap_hook(megatron_cfg, state)
+        megatron_cfg.model.register_pre_wrap_hook(pre_peft_hook)
+
+        def composed_peft_hook(model: list[MegatronModule]) -> list[MegatronModule]:
+            model = pre_peft_hook(model)
+            return model
+
+        pre_wrap_hook.extend([composed_peft_hook])
+
+    # Model, optimizer, and learning rate.
+    pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+    setattr(megatron_cfg.model, "_pg_collection", pg_collection)
+    model = get_model(
+        megatron_cfg.model,
+        megatron_cfg.ddp,
+        use_torch_fsdp2=megatron_cfg.dist.use_torch_fsdp2,
+        overlap_param_gather_with_optimizer_step=megatron_cfg.optimizer.overlap_param_gather_with_optimizer_step,
+        data_parallel_random_init=megatron_cfg.rng.data_parallel_random_init,
+        pre_wrap_hook=pre_wrap_hook,
+        mixed_precision_wrapper=mixed_precision_wrapper,
+        pg_collection=pg_collection,
+    )
+    if load_optimizer:
+        optimizer, scheduler = setup_optimizer(
+            optimizer_config=megatron_cfg.optimizer,
+            scheduler_config=megatron_cfg.scheduler,
+            model=model,
+            use_gloo_process_groups=megatron_cfg.dist.use_gloo_process_groups,
+        )
+    else:
+        optimizer = None
+        scheduler = None
+
+    print("Model, optimizer, and learning rate scheduler built")
+    torch.distributed.barrier()
+
+    if megatron_cfg.peft is not None:
+        should_load_checkpoint = (
+            megatron_cfg.checkpoint.load is not None
+            and checkpoint_exists(megatron_cfg.checkpoint.load)
+        )
+        if should_load_checkpoint:
+            # The finetune toggle is explicitly set to True in order to avoid loading optimizer and RNG states
+            # This is switched off here in order to load these states from the checkpoint
+            megatron_cfg.checkpoint.finetune = False
+    else:
+        should_load_checkpoint = (
+            megatron_cfg.checkpoint.load is not None
+            and checkpoint_exists(megatron_cfg.checkpoint.load)
+        ) or (
+            megatron_cfg.checkpoint.pretrained_checkpoint is not None
+            and checkpoint_exists(megatron_cfg.checkpoint.pretrained_checkpoint)
+        )
+
+    # Load checkpoint if applicable
+    if should_load_checkpoint:
+        load_checkpoint(
+            state,
+            model,
+            optimizer,
+            scheduler,
+            checkpointing_context=checkpointing_context,
+            skip_load_to_model_and_opt=HAVE_FSDP2 and megatron_cfg.dist.use_torch_fsdp2,
+        )
+        print("Checkpoint loaded")
+    torch.distributed.barrier()
+
+    # Set the param sync function for the model
+    param_sync_func = None
+    if megatron_cfg.ddp.overlap_param_gather and megatron_cfg.ddp.align_param_gather:
+        param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
+        if len(model) == 1:
+            param_sync_func = param_sync_func[0]
+
+    # Get the first model from the list
+    model = model[0]
+
+    return ModelAndOptimizerState(
+        state,
+        model,
+        optimizer,
+        scheduler,
+        checkpointing_context,
+        param_sync_func,
+    )
+
+
+def handle_model_import(
+    config: PolicyConfig,
+    hf_model_name: str,
+    pretrained_path: str,
+    pt_checkpoint_exists: bool,
+) -> None:
+    """Handle HF model import if checkpoint doesn't exist."""
+    if pt_checkpoint_exists:
+        print(f"Checkpoint already exists at {pretrained_path}. Skipping import.")
+    else:
+        hf_config_overrides = config.get("hf_config_overrides", {}) or {}
+        import_model_from_hf_name(
+            hf_model_name,
+            pretrained_path,
+            config["megatron_cfg"],
+            **hf_config_overrides,
+        )
+
+        if parallel_state.model_parallel_is_initialized():
+            print("Reinitializing model parallel after loading model state.")
+            parallel_state.destroy_model_parallel()
+
+
+def setup_reference_model_state(
+    config: PolicyConfig, megatron_cfg: ConfigContainer, pretrained_path: str
+) -> dict:
+    """Setup the reference model for inference and return its state dict."""
+    # Create reference checkpoint config
+    ref_checkpoint_config = CheckpointConfig(
+        pretrained_checkpoint=pretrained_path,
+        save=None,
+        load=None,
+        fully_parallel_load=True,
+        load_rng=False,
+    )
+
+    ref_ckpt_context = init_checkpointing_context(ref_checkpoint_config)
+
+    # Create a separate megatron config for the reference model
+    ref_megatron_cfg = ConfigContainer(
+        model=megatron_cfg.model,
+        checkpoint=ref_checkpoint_config,
+        logger=megatron_cfg.logger,
+        train=megatron_cfg.train,
+        optimizer=megatron_cfg.optimizer,
+        ddp=megatron_cfg.ddp,
+        scheduler=megatron_cfg.scheduler,
+        dataset=megatron_cfg.dataset,
+        tokenizer=megatron_cfg.tokenizer,
+    )
+
+    # Create a separate state object for the reference model
+    ref_state = GlobalState()
+    ref_state.cfg = ref_megatron_cfg
+
+    # Configure mixed precision wrapper for reference model
+    ref_mixed_precision_wrapper = Float16Module
+    if config["megatron_cfg"].get("freeze_moe_router", False):
+        ref_mixed_precision_wrapper = MoEFloat16Module
+
+    reference_model = get_model(
+        megatron_cfg.model,
+        megatron_cfg.ddp,
+        use_torch_fsdp2=megatron_cfg.dist.use_torch_fsdp2,
+        overlap_param_gather_with_optimizer_step=megatron_cfg.optimizer.overlap_param_gather_with_optimizer_step,
+        pre_wrap_hook=megatron_cfg.rng.data_parallel_random_init,
+        mixed_precision_wrapper=ref_mixed_precision_wrapper,
+        pg_collection=ProcessGroupCollection.use_mpu_process_groups(),
+    )
+
+    print("Loading the Reference Model")
+    reference_state_dict = {}
+
+    if ref_checkpoint_config.pretrained_checkpoint is not None and checkpoint_exists(
+        ref_checkpoint_config.pretrained_checkpoint
+    ):
+        load_checkpoint(
+            ref_state,
+            reference_model,
+            None,  # no optimizer
+            None,  # no scheduler
+            checkpointing_context=ref_ckpt_context,
+            skip_load_to_model_and_opt=HAVE_FSDP2 and megatron_cfg.dist.use_torch_fsdp2,
+        )
+        reference_model = reference_model[0]
+        reference_model.eval()
+
+        # Store reference state dict on CPU
+        for name, item in reference_model.state_dict().items():
+            if isinstance(item, torch.Tensor):
+                cpu_item = item.detach().to(device="cpu", non_blocking=True, copy=True)
+                del item
+            else:
+                cpu_item = item
+            reference_state_dict[name] = cpu_item
+        print("Reference model loaded")
+    else:
+        print("Reference model not loaded")
+
+    return reference_state_dict
+
+
+def finalize_megatron_setup(
+    config: PolicyConfig,
+    megatron_cfg: ConfigContainer,
+    hf_model_name: str,
+    worker_sharding_annotations: NamedSharding,
+    model,
+    optimizer,
+) -> tuple:
+    """Finalize the setup with remaining configurations.
+
+    Returns:
+        Tuple of (megatron_tokenizer, megatron_bridge, should_disable_forward_pre_hook, dp_size)
+    """
+    _update_model_config_funcs(
+        [model],
+        megatron_cfg.model,
+        megatron_cfg.ddp,
+        optimizer,
+        align_grad_reduce=megatron_cfg.dist.align_grad_reduce,
+        pg_collection=ProcessGroupCollection.use_mpu_process_groups(),
+    )
+
+    tokenizer_config = TokenizerConfig(
+        tokenizer_type="HuggingFaceTokenizer",
+        tokenizer_model=hf_model_name,
+    )
+
+    megatron_tokenizer = build_tokenizer(
+        tokenizer_config,
+        make_vocab_size_divisible_by=megatron_cfg.model.make_vocab_size_divisible_by
+        // config["megatron_cfg"]["tensor_model_parallel_size"],
+        tensor_model_parallel_size=config["megatron_cfg"]["tensor_model_parallel_size"],
+        trust_remote_code=True,
+    )
+
+    dp_size = worker_sharding_annotations.get_axis_size("data_parallel")
+    megatron_bridge = AutoBridge.from_hf_pretrained(
+        hf_model_name, trust_remote_code=True
+    )
+
+    should_disable_forward_pre_hook = (
+        config["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
+        and config["megatron_cfg"]["distributed_data_parallel_config"][
+            "overlap_param_gather"
+        ]
+    )
+
+    return megatron_tokenizer, megatron_bridge, should_disable_forward_pre_hook, dp_size
+
+
+class MoEFloat16Module(Float16Module):
+    """Float 16 Module with the ability to keep the expert bias in float32.
+
+    Attributes:
+        config (TransformerConfig): Transformer config
+        fp16 (bool) : Specifies if the model runs in fp16 mode
+        bf16 (bool) : Specifies if the model runs in bf16 mode
+
+    Args:
+        config (TransformerConfig): The transformer config used to initalize the model
+    """
+
+    def __init__(self, config: TransformerConfig, module: torch.nn.Module):
+        super(MoEFloat16Module, self).__init__(config, module)
+        self.re_enable_float32_expert_bias()
+
+    def re_enable_float32_expert_bias(self) -> None:
+        """Ensure MoE router expert bias stays in float32 for numerical stability.
+
+        Walks the wrapped module to find MoE routers and invokes the
+        `_maintain_float32_expert_bias()` helper which recreates or casts the
+        expert bias tensors to float32 as required by Megatron-LM.
+        """
+        module = self.module
+        # Handle VLM models where language model is nested
+        if hasattr(module, "language_model"):
+            module = module.language_model
+        if hasattr(module, "decoder") and hasattr(module.decoder, "layers"):
+            for layer in module.decoder.layers:
+                mlp = getattr(layer, "mlp", None)
+                router = getattr(mlp, "router", None) if mlp is not None else None
+                if router is not None and hasattr(
+                    router, "_maintain_float32_expert_bias"
+                ):
+                    router._maintain_float32_expert_bias()
diff --git a/nemo_rl/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py
index 1a934a26d4..363399cbca 100644
--- a/nemo_rl/models/policy/__init__.py
+++ b/nemo_rl/models/policy/__init__.py
@@ -183,6 +183,16 @@ class MegatronConfig(TypedDict):
     # Force overwrite of the initial checkpoint even if it exists (default: False)
     force_overwrite_initial_ckpt: NotRequired[bool]
     moe_per_layer_logging: bool
+    # Set to true to enable DeepEP for expert parallel communication
+    # Must set moe_token_dispatcher_type to 'flex'
+    # Must set moe_shared_expert_overlap to False
+    moe_enable_deepep: bool
+    # The type of token dispatcher to use. The default is 'allgather'.
+    # Options are 'allgather','alltoall' and 'flex'
+    # Use 'flex' when using DeepEP
+    moe_token_dispatcher_type: str
+    # Can be used only with 'alltoall' token dispatcher
+    moe_shared_expert_overlap: bool
     optimizer: MegatronOptimizerConfig
     scheduler: MegatronSchedulerConfig
     distributed_data_parallel_config: MegatronDDPConfig
diff --git a/nemo_rl/models/policy/interfaces.py b/nemo_rl/models/policy/interfaces.py
index 144b0c517d..6e64c6289b 100644
--- a/nemo_rl/models/policy/interfaces.py
+++ b/nemo_rl/models/policy/interfaces.py
@@ -182,6 +182,18 @@ def stream_weights_via_ipc_zmq(
     ) -> list[ray.ObjectRef]:
         pass
 
+    def stream_weights_via_http(
+        self, sglang_url_to_gpu_uuids: dict[str, list[str]]
+    ) -> list[ray.ObjectRef]:
+        """Stream model weights to SGLang servers via HTTP API.
+
+        Args:
+            sglang_url_to_gpu_uuids: Dict mapping SGLang server URL to list of GPU UUIDs it uses
+        """
+        raise NotImplementedError(
+            "stream_weights_via_http is not implemented for this policy worker"
+        )
+
     @abstractmethod
     def broadcast_weights_for_collective(
         self, kv_scales: Optional[dict[str, float]] = None
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index 144683c95c..1f908824fe 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -768,6 +768,20 @@ def stream_weights_via_ipc_zmq(
         )
         return futures
 
+    def stream_weights_via_http(
+        self, sglang_url_to_gpu_uuids: dict[str, list[str]]
+    ) -> list[ray.ObjectRef]:
+        """Send the weights to SGLang servers via HTTP API.
+
+        Args:
+            sglang_url_to_gpu_uuids: Dict mapping SGLang server URL to list of GPU UUIDs it uses
+        """
+        futures = self.worker_group.run_all_workers_single_data(
+            "stream_weights_via_http",
+            sglang_url_to_gpu_uuids=sglang_url_to_gpu_uuids,
+        )
+        return futures
+
     def broadcast_weights_for_collective(
         self, kv_scales: Optional[dict[str, float]] = None
     ) -> list[ray.ObjectRef]:
diff --git a/nemo_rl/models/policy/utils.py b/nemo_rl/models/policy/utils.py
index 7ad33708a2..6061b0f143 100644
--- a/nemo_rl/models/policy/utils.py
+++ b/nemo_rl/models/policy/utils.py
@@ -16,9 +16,11 @@
 import os
 import traceback
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, cast
 
+import requests
 import torch
+import torch.distributed as dist
 import zmq
 from torch.multiprocessing.reductions import rebuild_cuda_tensor
 from transformers import (
@@ -473,3 +475,260 @@ def rebuild_cuda_tensor_from_ipc(
     list_args = list(args)
     list_args[6] = device_id
     return func(*list_args)
+
+
+def stream_weights_via_http_impl(
+    params_generator,
+    sglang_url_to_gpu_uuids: dict[str, list[str]],
+    rank: int,
+    worker_name: str,
+    current_device_uuid: str,
+) -> None:
+    """Stream weights to SGLang servers via HTTP API (update_weights_from_tensor).
+
+    Flow: Each rank creates IPC handler → gather handlers in rank order → send list → SGLang matches by tp_rank index
+
+    Key points:
+    - Each rank creates handler on its own GPU
+    - Handlers are gathered in rank order: [rank0_handler, rank1_handler, ...]
+    - List index = rank = GPU ID
+    - SGLang automatically matches: handler = serialized_handlers[tp_rank]
+
+    Args:
+        params_generator: Generator yielding (name, tensor) pairs
+        sglang_url_to_gpu_uuids: Dict mapping SGLang server URL to list of GPU UUIDs it uses
+        rank: Worker rank for logging
+        worker_name: Name of the worker for logging
+        current_device_uuid: UUID of the current training worker's GPU
+    """
+    from nemo_rl.models.generation.sglang.sglang_copied_utils import (
+        MultiprocessingSerializer,
+    )
+
+    print("[sglang refit details] entering stream_weights_via_http_impl")
+
+    target_urls = [
+        url
+        for url, uuids in sglang_url_to_gpu_uuids.items()
+        if current_device_uuid in uuids
+    ]
+
+    if not target_urls:
+        raise RuntimeError(
+            f"{worker_name} (rank {rank}): No matching SGLang server found for GPU UUID {current_device_uuid}. "
+            f"Available servers: {list(sglang_url_to_gpu_uuids.keys())}"
+        )
+
+    if len(target_urls) > 1:
+        print(
+            f"[WARNING] {worker_name} (rank {rank}): GPU UUID {current_device_uuid} matches multiple SGLang servers: {target_urls}. "
+            f"Using the first one: {target_urls[0]}"
+        )
+        target_urls = [target_urls[0]]
+
+    base_url = target_urls[0]
+    url = f"{base_url}/update_weights_from_tensor"
+    sglang_gpu_uuids = sglang_url_to_gpu_uuids[base_url]
+
+    ipc_gather_group, ipc_gather_src, matching_ranks = _setup_ipc_gather_group(
+        rank, current_device_uuid, sglang_gpu_uuids, sglang_url_to_gpu_uuids
+    )
+    print(
+        f"[sglang refit] {worker_name} (rank {rank}): ipc_gather_group={ipc_gather_group}, ipc_gather_src={ipc_gather_src}, matching_ranks={matching_ranks}"
+    )
+    tensor_count = 0
+
+    try:
+        tensor_list = list(params_generator)
+        total_tensors = len(tensor_list)
+
+        if rank == ipc_gather_src:
+            print(
+                f"[sglang refit details] {worker_name}: Starting weight update - "
+                f"Total parameters to update: {total_tensors}",
+                flush=True,
+            )
+
+        for idx, (name, tensor) in enumerate(tensor_list):
+            torch.cuda.current_stream().synchronize()
+            tensor = tensor.contiguous().cuda()
+
+            named_tensors = [(name, tensor)]
+            serialized_handler = MultiprocessingSerializer.serialize(
+                named_tensors, output_str=True
+            )
+            # output_str=True ensures the return type is str
+            serialized_handler_str = cast(str, serialized_handler)
+
+            gathered_handlers = _gather_ipc_handlers(
+                serialized_handler_str,
+                ipc_gather_group,
+                ipc_gather_src,
+                rank,
+                matching_ranks,
+            )
+
+            if rank == ipc_gather_src and gathered_handlers is not None:
+                _send_tensor_to_sglang(
+                    url,
+                    name,
+                    gathered_handlers,
+                    tensor.shape,
+                    str(tensor.dtype),
+                    flush_cache=False,
+                )
+                tensor_count += 1
+
+            del tensor, serialized_handler
+            if rank == ipc_gather_src:
+                del gathered_handlers
+            torch.cuda.empty_cache()
+
+        if rank == ipc_gather_src:
+            print(
+                f"[sglang refit details] {worker_name}: Weight update completed - "
+                f"Successfully updated {tensor_count}/{total_tensors} parameters to SGLang server: {base_url}",
+                flush=True,
+            )
+            if tensor_count != total_tensors:
+                print(
+                    f"[sglang refit details] {worker_name}: WARNING - Expected {total_tensors} tensors, "
+                    f"but only sent {tensor_count}",
+                    flush=True,
+                )
+
+    except Exception as e:
+        print(
+            f"{worker_name} (rank {rank}): Error during HTTP weight streaming: {e}.\n"
+            f"{traceback.format_exc()}"
+        )
+        raise
+
+    finally:
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+def _setup_ipc_gather_group(
+    rank: int,
+    current_device_uuid: str,
+    sglang_gpu_uuids: list[str],
+    sglang_url_to_gpu_uuids: dict[str, list[str]],
+) -> tuple[Optional[dist.ProcessGroup], Optional[int], Optional[list[int]]]:
+    """Setup gather configuration for IPC handlers.
+
+    Returns:
+        Tuple of (gather_group, gather_src_rank, matching_ranks)
+        - gather_group: None (use default FSDP group)
+        - gather_src_rank: The rank that will collect and send to SGLang server
+        - matching_ranks: List of ranks that belong to the same SGLang server
+    """
+    if not dist.is_initialized():
+        return None, None, None
+
+    world_size = dist.get_world_size()
+    my_rank = dist.get_rank()
+
+    all_ranks_uuids = [None] * world_size
+    dist.all_gather_object(all_ranks_uuids, current_device_uuid)
+
+    matching_ranks = [
+        r for r, uuid in enumerate(all_ranks_uuids) if uuid in sglang_gpu_uuids
+    ]
+
+    if len(matching_ranks) == 0:
+        return None, None, None
+
+    matching_ranks = sorted(matching_ranks)
+    gather_src = matching_ranks[0]
+
+    return None, gather_src, matching_ranks
+
+
+def _gather_ipc_handlers(
+    serialized_handler: str,
+    gather_group: Optional[dist.ProcessGroup],
+    gather_src: Optional[int],
+    rank: int,
+    matching_ranks: Optional[list[int]] = None,
+) -> Optional[list[str]]:
+    """Gather IPC handlers from all ranks in the default FSDP group, then filter by server.
+
+    Args:
+        serialized_handler: Serialized IPC handler from this rank
+        gather_group: Process group (None means use default FSDP group)
+        gather_src: Rank that will collect and filter handlers
+        rank: Current rank
+        matching_ranks: List of ranks that belong to the same SGLang server
+
+    Returns:
+        List of serialized handlers in rank order (only on gather_src rank), None otherwise
+        The list contains handlers from matching_ranks only, in rank order
+    """
+    if gather_src is None:
+        return None
+
+    if not dist.is_initialized():
+        return None
+
+    world_size = dist.get_world_size()
+
+    all_handlers: list[Optional[str]] = [None for _ in range(world_size)]
+    dist.all_gather_object(all_handlers, serialized_handler)
+    all_handlers_str = cast(list[str], all_handlers)
+
+    if rank == gather_src and matching_ranks is not None:
+        filtered_handlers: list[str] = [all_handlers_str[r] for r in matching_ranks]
+        return filtered_handlers
+    else:
+        return None
+
+
+def _send_tensor_to_sglang(
+    url: str,
+    tensor_name: str,
+    gathered_handlers: list[str],
+    shape: torch.Size,
+    dtype: str,
+    flush_cache: bool = False,
+) -> None:
+    """Send gathered IPC handlers to SGLang server via HTTP.
+
+    Key: gathered_handlers are in rank order [rank0, rank1, ...]
+    SGLang will automatically match: handler = serialized_handlers[tp_rank]
+
+    Args:
+        url: SGLang server URL
+        tensor_name: Name of the tensor
+        gathered_handlers: List of serialized IPC handlers in rank order
+        shape: Tensor shape
+        dtype: Tensor dtype
+        flush_cache: Whether to flush cache after this tensor (for last tensor)
+    """
+    payload = {
+        "serialized_named_tensors": gathered_handlers,
+        "flush_cache": flush_cache,
+    }
+
+    try:
+        response = requests.post(
+            url,
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            timeout=120,
+        )
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        error_msg = f"Failed to send tensor '{tensor_name}' to {url}: {e}"
+        try:
+            error_detail = response.text
+            error_msg += f"\nResponse status: {response.status_code}"
+            error_msg += f"\nResponse body: {error_detail[:500]}"
+        except:
+            pass
+        print(f"[sglang refit] {error_msg}", flush=True)
+        raise RuntimeError(error_msg) from e
+    except Exception as e:
+        raise RuntimeError(
+            f"Failed to send tensor '{tensor_name}' to {url}: {e}"
+        ) from e
diff --git a/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py
index 49e1360c57..8102c583d9 100644
--- a/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py
+++ b/nemo_rl/models/policy/workers/dtensor_policy_worker_v2.py
@@ -15,55 +15,30 @@
 import contextlib
 import gc
 import itertools
-import os
 import warnings
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
-from typing import Any, Generator, Optional, cast
+from typing import Any, Generator, Optional
 
 import ray
 import torch
-from accelerate import init_empty_weights
-from hydra.utils import get_class
-from nemo_automodel import (
-    NeMoAutoModelForSequenceClassification,
-)
-from nemo_automodel._transformers.registry import ModelRegistry
-from nemo_automodel.components._peft.lora import (
-    PeftConfig,
-    apply_lora_to_linear_modules,
-)
-from nemo_automodel.components.config.loader import _resolve_target
 from nemo_automodel.components.distributed.cp_utils import (
     create_context_parallel_ctx,
 )
 from nemo_automodel.components.distributed.cp_utils import (
     get_train_context as get_train_context_automodel,
 )
-from nemo_automodel.components.distributed.fsdp2 import (
-    FSDP2Manager,
-)
 from nemo_automodel.components.distributed.tensor_utils import (
     get_cpu_state_dict,
     to_local_if_dtensor,
 )
-from nemo_automodel.components.moe.parallelizer import (
-    parallelize_model as moe_parallelize_model,
-)
 from nemo_automodel.components.training.utils import scale_grads_and_clip_grad_norm
 from torch import nn
-from torch.distributed.fsdp import (
-    CPUOffloadPolicy,
-    MixedPrecisionPolicy,
-)
 from torch.distributed.tensor import DTensor, Shard
 from transformers import (
-    AutoConfig,
     AutoProcessor,
     AutoTokenizer,
-    PreTrainedModel,
 )
-from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
@@ -73,6 +48,12 @@
     distributed_vocab_topk,
     get_logprobs_from_vocab_parallel_logits,
 )
+from nemo_rl.models.automodel.setup import (
+    setup_distributed,
+    setup_model_and_optimizer,
+    setup_reference_model_state,
+    validate_and_prepare_config,
+)
 from nemo_rl.models.huggingface.common import (
     get_flash_attention_kwargs,
     pack_sequences,
@@ -84,9 +65,7 @@
     ScoreOutputSpec,
 )
 from nemo_rl.models.policy.utils import (
-    configure_dynamo_cache,
     get_runtime_env_for_policy_worker,
-    resolve_model_class,
 )
 from nemo_rl.models.policy.workers.base_policy_worker import AbstractPolicyWorker
 from nemo_rl.models.policy.workers.patches import (
@@ -98,12 +77,6 @@
 from nemo_rl.utils.nsys import wrap_with_nvtx_name
 from nemo_rl.utils.packed_tensor import packed_broadcast_producer
 
-STRING_TO_DTYPE = {
-    "float32": torch.float32,
-    "bfloat16": torch.bfloat16,
-    "float16": torch.float16,
-}
-
 
 def dtensor_params_generator(
     model: nn.Module, target_dtype: torch.dtype
@@ -202,430 +175,109 @@ def __init__(
         # Apply patch to work around 'NotImplementedError: Operator aten.alias.default does not have a sharding strategy registered'
         apply_torch_aten_alias_tensor_patch()
 
+        # Store configuration and tokenizer/processor
+        self.cfg = config
         self.tokenizer = tokenizer
         self.processor = processor
         self.is_vlm = processor is not None
+        self.lora_enabled = (
+            config["dtensor_cfg"].get("lora_cfg", {}).get("enabled", False)
+        )
 
         print(f"Initializing DTensorPolicyWorkerV2 with is_vlm={self.is_vlm}")
 
-        self.is_generation_colocated = None
-        if "generation" in config and config["generation"] is not None:
-            self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
-
-        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
-        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
-        if not self.is_generation_colocated:
-            os.environ["NCCL_CUMEM_ENABLE"] = "1"
-
-        # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
-        # with different order of node_bundles
-        configure_dynamo_cache()
-
-        self.cfg = config
-        self.cpu_offload = self.cfg["dtensor_cfg"]["cpu_offload"]
-        # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
-        backend = "nccl" if not self.cpu_offload else "cuda:nccl,cpu:gloo"
-        torch.distributed.init_process_group(backend=backend)
-        self.rank = torch.distributed.get_rank()
-        world_size = torch.distributed.get_world_size()
-        model_name = self.cfg["model_name"]
-
+        # Initialize checkpoint manager
         self.checkpoint_manager: Optional[AutomodelCheckpointManager] = None
 
-        self.cpu_offload = self.cfg["dtensor_cfg"]["cpu_offload"]
-        self.offload_optimizer_for_logprob = self.cfg["offload_optimizer_for_logprob"]
-        self.max_grad_norm = self.cfg["max_grad_norm"]
-
-        try:
-            self.dtype = STRING_TO_DTYPE[self.cfg["precision"]]
-        except KeyError:
-            raise ValueError(f"Unknown precision: {self.cfg['precision']}")
-
-        self.enable_seq_packing = self.cfg["sequence_packing"]["enabled"]
-        if self.enable_seq_packing:
-            assert not self.is_vlm, (
-                "Sequence packing is not supported for VLM models. Please set policy.sequence_packing.enabled = False to train VLM models."
-            )
-            print(
-                f"[Rank {self.rank}] Sequence packing is enabled for model {model_name}"
-            )
-            print(f"[Rank {self.rank}] Using FlashAttention2 for sequence packing")
-
-        hf_config_overrides = self.cfg.get("hf_config_overrides", {}) or {}
-
-        # Choose attention implementation on the following basis:
-        # - Packed sequence requires FA2 and CP must be 1
-        # - CP > 1 requires SDPA
-        cp_size_cfg = self.cfg["dtensor_cfg"]["context_parallel_size"]
-
-        # NeMoAutoModelForCausalLM uses flash_attention_2 by default
-        # so we need to set it to None if sequence packing is disabled
-        # https://github.com/NVIDIA-NeMo/Automodel/blob/7e748be260651349307862426c0c168cebdeeec3/nemo_automodel/components/_transformers/auto_model.py#L180
-        attn_impl = (
-            "flash_attention_2"
-            if (self.enable_seq_packing and cp_size_cfg == 1)
-            else ("sdpa" if cp_size_cfg > 1 else None)
-        )
-
-        model_config = AutoConfig.from_pretrained(
-            model_name,
-            # Always load the model in float32 to keep master weights in float32.
-            # Keeping the master weights in lower precision has shown to cause issues with convergence.
-            torch_dtype=torch.float32,
-            trust_remote_code=True,
-            attn_implementation="flash_attention_2"
-            if self.enable_seq_packing
-            else None,
-            **hf_config_overrides,
+        # Validate configuration and prepare runtime settings
+        runtime_config = validate_and_prepare_config(
+            config=config,
+            processor=self.processor,
+            rank=0,  # Temporary, will be updated after distributed init
         )
 
-        self.allow_flash_attn_args = self.check_model_allow_flash_attn_args(
-            model_config
+        # Set up distributed environment (returns FSDP2Manager)
+        distributed_manager = setup_distributed(
+            config=config,
+            runtime_config=runtime_config,
         )
-
-        self._is_reward_model = (
-            "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]
-        )
-        if self._is_reward_model:
-            # Ensure sequence packing is disabled.
-            if self.enable_seq_packing:
-                raise NotImplementedError(
-                    "Sequence packing is not supported for reward models"
-                )
-            # Load model as a Reward Model.
-            rm_type = self.cfg["reward_model_cfg"]["reward_model_type"]
-            if rm_type == "bradley_terry":
-                model_class = NeMoAutoModelForSequenceClassification
-                if model_config.num_labels != 1:
-                    # For Bradley-Terry reward models, the linear head has a single output.
-                    # In the transformers library, the default setting for model_config.num_labels is 2
-                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/configuration_utils.py#L259).
-                    # Since num_labels is used as the out_features for the linear head
-                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/modeling_llama.py#L738)
-                    # if num_labels is not 1, we set it to 1. This change may trigger a warning that some weights are not initialized
-                    # from the model checkpoint and are instead initialized using model_config.initializer_range
-                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/configuration_llama.py#L62).
-                    print(
-                        "model_config.num_labels is not 1. Setting it to 1 since this value is used as the out_features "
-                        "for the linear head of Bradley-Terry reward models."
-                    )
-                    model_config.num_labels = 1
-            else:
-                raise ValueError(f"Unknown reward model type: {rm_type}")
-        else:
-            # DO NOT assume AutoModelForCausalLM, multimodal models can inherit from AutoModelForImageTextToText, AutoModelForTextToWaveform, etc.
-            model_class = resolve_model_class(model_config.model_type)
-
-        # lora config
-        lora_cfg = self.cfg["dtensor_cfg"].get("lora_cfg", None)
-        self.peft_config = None
-        self.lora_enabled = lora_cfg is not None and lora_cfg["enabled"]
-        if self.lora_enabled:
-            if self.cfg["dtensor_cfg"]["tensor_parallel_size"] > 1:
-                assert not lora_cfg["use_triton"], (
-                    "Triton is not supported when tensor_parallel_size > 1"
-                )
-            # Always use float32 since FSDP requires all parameters to be in the same dtype.
-            # autocast should cast the weights to the correct dtype during the forward pass.
-            cfg_dict_with_dtype = {**lora_cfg, "lora_dtype": "torch.float32"}
-            self.peft_config = PeftConfig.from_dict(cfg_dict_with_dtype)
-
-        print(f"[Rank {self.rank}] Initializing empty model for FSDP...")
-        # All ranks initialize model on meta device, so FSDP can shard it.
-        # The actual weights will be broadcast from rank 0.
-
-        cp_size = self.cfg["dtensor_cfg"]["context_parallel_size"]
-        automodel_kwargs = self.cfg["dtensor_cfg"].get("automodel_kwargs", {})
-        if automodel_kwargs.get("backend", None) is not None:
-            backend_class = _resolve_target(
-                automodel_kwargs.get("backend", None)["_target_"]
-            )
-            backend_kwargs = automodel_kwargs.get("backend")
-            backend_kwargs.pop("_target_")
-            backend = backend_class(
-                **backend_kwargs,
-            )
-            automodel_kwargs["backend"] = backend
-
-        if "use_liger_kernel" not in automodel_kwargs:
-            automodel_kwargs["use_liger_kernel"] = False
-
-        with init_empty_weights():
-            from torch.nn.attention import SDPBackend
-
-            if cp_size > 1:
-                # Match Automodel's `get_train_context` in `cp_utils.py` where only
-                # flash and efficient backends are supported
-                # Ref: https://github.com/NVIDIA-NeMo/Automodel/blob/81788d6f4848f5f066c4a6a2bece4689a6a83687/nemo_automodel/components/distributed/cp_utils.py#L57
-                sdpa_method = [
-                    SDPBackend.FLASH_ATTENTION,
-                    SDPBackend.EFFICIENT_ATTENTION,
-                ]
-            elif self.cfg["dtensor_cfg"]["activation_checkpointing"]:
-                # For activation checkpointing, we must disable the cudnn SDPA backend because
-                # it may not be selected during recomputation.
-                # In that case, we will get the following error:
-                # "Recomputed values have different metadata than during forward pass."
-                sdpa_method = [
-                    SDPBackend.FLASH_ATTENTION,
-                    SDPBackend.EFFICIENT_ATTENTION,
-                    SDPBackend.MATH,
-                ]
-            else:
-                sdpa_method = None
-
-            self.model = model_class.from_pretrained(
-                model_name,
-                attn_implementation=attn_impl,
-                torch_dtype=str(model_config.torch_dtype),
-                trust_remote_code=True,
-                config=model_config,
-                sdpa_method=sdpa_method,
-                **automodel_kwargs,
-            )
-            if self.lora_enabled:
-                apply_lora_to_linear_modules(self.model, self.peft_config)
-
-        # For activation checkpointing, we also must globally disable the cudnn SDPA backend
-        # to ensure that cudnn does not get selected during recomputation.
-        if self.cfg["dtensor_cfg"]["activation_checkpointing"]:
-            from torch.backends import cuda
-
-            cuda.enable_cudnn_sdp(False)
-
-        # Hold a copy of model state_dict keys before any parallelization
-        self.model_state_dict_keys = list(self.model.state_dict().keys())
-
-        if self.model.config.pad_token_id is None:
-            self.model.config.pad_token_id = tokenizer.pad_token_id
-
-        tp_size = self.cfg["dtensor_cfg"]["tensor_parallel_size"]
-        ep_size = self.cfg["dtensor_cfg"].get("expert_parallel_size", 1)
-        dp_size = None  # will be inferred
-        if cp_size > 1 and self.enable_seq_packing:
-            raise ValueError(
-                "Context parallel is not supported for sequence packing. Refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
-            )
-        sequence_parallel_enabled = self.cfg["dtensor_cfg"]["sequence_parallel"]
-
-        if sequence_parallel_enabled and tp_size == 1:
-            print(
-                "[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. Enable tp_size > 1 to use sequence parallelism."
-            )
-
-        if cp_size > 1:
-            assert not isinstance(self.model, Gemma3ForCausalLM), (
-                "Context parallel is not supported for Gemma3ForCausalLM. Torch context parallel has many limitations. "
-                "Please refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
-            )
-
-            assert not (tp_size > 1 and sequence_parallel_enabled), (
-                "It's a known issue that context parallel can't be used together with sequence parallel in DTensor worker. "
-                "Please either set cp_size = 1 or disable sequence parallel. "
-                "See https://github.com/NVIDIA-NeMo/RL/issues/659 for more details."
-            )
-
-            assert not self.is_vlm, (
-                "Context parallel is yet not supported for VLM models. Please set cp_size = 1 to train VLM models."
-            )
-
-        # ------------------------------------------------
-        # Build device mesh and parallelize
-        # ------------------------------------------------
-        manager = FSDP2Manager(
-            dp_size=dp_size,
-            dp_replicate_size=1,
-            tp_size=tp_size,
-            cp_size=cp_size,
-            ep_size=ep_size,
-            pp_size=1,
-            sequence_parallel=sequence_parallel_enabled,
-            use_hf_tp_plan=self.cfg["dtensor_cfg"].get("use_hf_tp_plan", False),
-            mp_policy=MixedPrecisionPolicy(
-                param_dtype=self.dtype,
-                reduce_dtype=torch.float32,
-                output_dtype=torch.float32,
-            ),
-            offload_policy=CPUOffloadPolicy(pin_memory=False)
-            if self.cpu_offload
-            else None,
-            backend="nccl",
-            world_size=world_size,
-            activation_checkpointing=self.cfg["dtensor_cfg"][
-                "activation_checkpointing"
-            ],
-            custom_tp_plan=self.cfg["dtensor_cfg"].get("custom_parallel_plan", None),
-        )
-
-        # Force setup distributed for world size 1 as FSDP2Manager skips it.
-        if world_size == 1:
-            manager._setup_distributed()
-
-        # Store mesh references for downstream usage
-        self.device_mesh = manager.device_mesh
+        # Set instance attributes from distributed manager (tuple unpacking for mesh attributes)
+        self.rank = torch.distributed.get_rank()
+        self.device_mesh = distributed_manager.device_mesh
         self.dp_cp_mesh = self.device_mesh["dp_cp"]
         self.dp_mesh = self.device_mesh["dp"]
         self.tp_mesh = self.device_mesh["tp"]
         self.cp_mesh = self.device_mesh["cp"]
-        self.moe_mesh = getattr(manager, "moe_mesh", None)
+        self.moe_mesh = distributed_manager.moe_mesh
+        self.dp_size = distributed_manager.dp_size
+        self.tp_size = distributed_manager.tp_size
+        self.cp_size = distributed_manager.cp_size
 
-        self.dp_size = manager.dp_size
-        self.tp_size = manager.tp_size
-        self.cp_size = manager.cp_size
-
-        # Parallelize model
-        self.is_moe_model = any(["expert" in key for key in self.model_state_dict_keys])
-        self.is_hf_model = (
-            model_config.architectures[0] not in ModelRegistry.model_arch_name_to_cls
-        )
-        # Autocast is disabled for custom MoE models (non-HF) to avoid numerical issues
-        self.autocast_enabled = not (self.is_moe_model and not self.is_hf_model)
-        if (
-            not isinstance(self.model, PreTrainedModel)
-            and self.is_moe_model
-            and not self.is_hf_model
-        ):
-            assert self.tp_size == 1, (
-                "Using custom implementation {self.model.__class__.__name__} for MoE model {model_name} which doesn't support tp_size > 1. Please use expert_parallel_size > 1 for custom implementation or set force_hf=True in your config at policy->dtensor_cfg->automodel_kwargs to use the HuggingFace implementation."
-            )
-            assert self.cp_size == 1, (
-                "Using custom implementation {self.model.__class__.__name__} for MoE model {model_name} which doesn't support cp_size > 1. Please set force_hf=True in your config at policy->dtensor_cfg->automodel_kwargs to use the HuggingFace implementation."
-            )
-            moe_parallelize_model(
-                model=self.model,
-                world_mesh=self.device_mesh,
-                moe_mesh=self.moe_mesh,
-                pp_enabled=False,
-                dp_axis_names=(
-                    ("dp_replicate", "dp_shard_cp")
-                    if "dp_replicate" in self.device_mesh.mesh_dim_names
-                    and "dp_shard_cp" in self.device_mesh.mesh_dim_names
-                    else ("dp_shard_cp",)
-                ),
-                cp_axis_name="cp",
-                tp_axis_name="tp",
-                ep_axis_name="ep",
-                ep_shard_axis_names=("ep_shard",),
-            )
-        else:
-            self.model = manager.parallelize(self.model)
-
-        # Load base model weights across all ranks using Automodel Checkpointer
-        # This mirrors build_model_and_optimizer's is_meta_device + load_weights path
-        print(self.model)
+        # Initialize checkpoint manager now that distributed is set up
         self._init_checkpoint_manager(
             config_updates={
-                "model_repo_id": model_name,
-                "dequantize_base_checkpoint": self.cfg.get(
+                "model_repo_id": config["model_name"],
+                "dequantize_base_checkpoint": config.get(
                     "dequantize_base_checkpoint", False
                 ),
                 "is_peft": self.lora_enabled,
             },
         )
-        self.checkpoint_manager.set_model_state_dict_keys(self.model_state_dict_keys)
 
-        # Load base HF weights unless an explicit checkpoint is provided later
-        # This puts shards directly into the parallelized model
-        self.checkpoint_manager.load_base_model(
-            self.model,
-            model_name=model_name,
-            hf_cache_dir=hf_config_overrides.get("cache_dir", None),
-            dequantize_base_checkpoint=self.cfg.get(
-                "dequantize_base_checkpoint", False
-            ),
-            peft_init_method=self.peft_config.lora_A_init
-            if self.peft_config is not None
-            else None,
-        )
-
-        # Handle tied word embeddings after loading the state dict
-        # We need to actually tie the parameters at the model level
-        is_tied_lm_head = hasattr(self.model, "lm_head") and getattr(
-            getattr(self.model, "config", {}), "tie_word_embeddings", False
+        # Set up model and optimizer
+        model_and_optimizer_state = setup_model_and_optimizer(
+            config=config,
+            tokenizer=tokenizer,
+            runtime_config=runtime_config,
+            distributed_manager=distributed_manager,
+            checkpoint_manager=self.checkpoint_manager,
+            is_vlm=self.is_vlm,
+            init_optimizer=init_optimizer,
+            weights_path=weights_path,
+            optimizer_path=optimizer_path,
         )
-        if is_tied_lm_head:
-            embed_tokens_weight = None
-            for name, param in self.model.named_parameters():
-                if "embed_tokens" in name and name.endswith(".weight"):
-                    embed_tokens_weight = param
-                    break
-
-            if embed_tokens_weight is not None:
-                self.model.lm_head.weight = embed_tokens_weight
-
-        if self.cpu_offload:
-            self.model = self.move_to_device(self.model, "cpu")
 
+        # Set instance attributes from model and optimizer state (tuple unpacking)
+        (
+            self.model,
+            self.model_state_dict_keys,
+            self.optimizer,
+            self.scheduler,
+            self.is_hf_model,
+            self.is_moe_model,
+            self._is_reward_model,  # Note: using underscore prefix for internal naming
+            self.model_class,
+            self.model_config,
+            self.peft_config,
+            self.autocast_enabled,
+        ) = model_and_optimizer_state
+
+        # Initialize reference model if requested
+        self.reference_model_state_dict = None
         if init_reference_model:
-            self.reference_model_state_dict = get_cpu_state_dict(
-                self.model.state_dict().items(), pin_memory=True
-            )
-
-        if init_optimizer:
-            optimizer_cls = get_class(self.cfg["optimizer"]["name"])
-            self.optimizer = optimizer_cls(
-                self.model.parameters(),
-                **self.cfg["optimizer"]["kwargs"],
-            )
-        else:
-            self.optimizer = None
-
-        if "scheduler" in self.cfg and self.optimizer is not None:
-            if isinstance(self.cfg["scheduler"], dict):
-                scheduler_cls = get_class(cast(str, self.cfg["scheduler"]["name"]))
-                self.scheduler = scheduler_cls(
-                    self.optimizer, **self.cfg["scheduler"]["kwargs"]
-                )
-            else:
-                schedulers = []
-                for scheduler_cfg in self.cfg["scheduler"]:
-                    if "name" in scheduler_cfg:
-                        schedulers.append(
-                            get_class(scheduler_cfg["name"])(
-                                self.optimizer, **scheduler_cfg["kwargs"]
-                            )
-                        )
-                    else:
-                        assert "milestones" in scheduler_cfg, (
-                            "unknown scheduler config: ",
-                            scheduler_cfg,
-                        )
-                        milestones: list[int] = scheduler_cfg["milestones"]
-
-                self.scheduler = torch.optim.lr_scheduler.SequentialLR(
-                    self.optimizer, schedulers, milestones
-                )
-
-        elif self.optimizer is not None:
-            ## default to a passthrough LR schedule
-            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
-                self.optimizer, lr_lambda=lambda epoch: 1
-            )
-
-        # restore
-        if weights_path:
-            self.load_checkpoint(weights_path, optimizer_path)
-        else:
-            print(
-                "No weights path provided. Loaded base HF weights via Checkpointer (default policy init)"
-            )
+            self.reference_model_state_dict = setup_reference_model_state(self.model)
+
+        # Set instance attributes from runtime config (tuple unpacking)
+        (
+            self.model_class,  # Already set above, but includes in tuple for completeness
+            self.model_config,  # Already set above, but includes in tuple for completeness
+            self.hf_config_overrides,
+            self.allow_flash_attn_args,
+            self.attn_impl,
+            self.dtype,
+            self.enable_seq_packing,
+            self.max_grad_norm,
+            self.cpu_offload,
+            self.offload_optimizer_for_logprob,
+            self.is_generation_colocated,
+            _runtime_is_reward_model,  # Duplicate, already set as _is_reward_model
+        ) = runtime_config
 
     def _apply_temperature_scaling(self, logits: torch.Tensor) -> torch.Tensor:
         if "generation" in self.cfg and self.cfg["generation"] is not None:
             logits.div_(self.cfg["generation"]["temperature"])
         return logits
 
-    def check_model_allow_flash_attn_args(self, model_config) -> bool:
-        # Some models doesn't support flash_attn_kwargs
-        # Check nemotron nas.
-        if (
-            model_config.architectures[0] == "DeciLMForCausalLM"
-            and model_config.model_type == "nemotron-nas"
-        ):
-            return False
-
-        return True
-
     @wrap_with_nvtx_name("dtensor_policy_worker_v2/train")
     def train(
         self,
@@ -1832,6 +1484,53 @@ def stream_weights_via_ipc_zmq(
             worker_name=str(self),
         )
 
+    @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/stream_weights_via_http")
+    def stream_weights_via_http(
+        self,
+        sglang_url_to_gpu_uuids: dict[str, list[str]],
+    ) -> None:
+        """Stream model weights to SGLang servers via HTTP API.
+
+        Args:
+            sglang_url_to_gpu_uuids: Dict mapping SGLang server URL to list of GPU UUIDs it uses
+        """
+        # Manually move model to cuda for cpu offload case
+        if self.cpu_offload:
+            self.model = self.move_to_cuda(self.model)
+
+        from nemo_rl.models.policy.utils import stream_weights_via_http_impl
+
+        # Get current GPU UUID
+        current_device_uuid = self.report_device_id()
+
+        def dtensor_params_generator():
+            """Generator that yields (name, tensor) pairs, converting DTensors to local tensors."""
+            state_dict_items = sorted(
+                self.model.state_dict().items(), key=lambda x: x[0]
+            )
+            for name, tensor in state_dict_items:
+                if isinstance(tensor, DTensor):
+                    # Convert DTensor to full tensor for streaming
+                    full_tensor = tensor.full_tensor()
+                    # Convert to target dtype
+                    yield (
+                        name,
+                        full_tensor.to(self.dtype, non_blocking=True).contiguous(),
+                    )
+                else:
+                    # Convert to target dtype
+                    yield name, tensor.to(self.dtype, non_blocking=True).contiguous()
+
+        # Use the HTTP implementation
+        stream_weights_via_http_impl(
+            params_generator=dtensor_params_generator(),
+            sglang_url_to_gpu_uuids=sglang_url_to_gpu_uuids,
+            rank=self.rank,
+            worker_name=str(self),
+            current_device_uuid=current_device_uuid,
+        )
+
     @torch.no_grad()
     def broadcast_weights_for_collective(
         self, kv_scales: Optional[dict[str, float]] = None
diff --git a/nemo_rl/models/policy/workers/megatron_policy_worker.py b/nemo_rl/models/policy/workers/megatron_policy_worker.py
index 1d175f35b2..25a410c4ae 100644
--- a/nemo_rl/models/policy/workers/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/workers/megatron_policy_worker.py
@@ -19,49 +19,19 @@
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
 from functools import partial
-from typing import Any, Iterator, Optional, TypedDict, TypeVar, cast
+from typing import Any, Iterator, Optional, TypeVar, cast
 
 import ray
 import torch
-from megatron.bridge import AutoBridge
-from megatron.bridge.models.model_provider import get_model
-from megatron.bridge.peft.lora import LoRA
-from megatron.bridge.training import fault_tolerance
 from megatron.bridge.training.checkpointing import (
-    checkpoint_exists,
-    init_checkpointing_context,
-    load_checkpoint,
     maybe_finalize_async_save,
     save_checkpoint,
 )
-from megatron.bridge.training.config import (
-    CheckpointConfig,
-    ConfigContainer,
-    DistributedDataParallelConfig,
-    LoggerConfig,
-    OptimizerConfig,
-    SchedulerConfig,
-    TokenizerConfig,
-    TrainingConfig,
-)
-from megatron.bridge.training.initialize import (
-    initialize_megatron,
-    set_jit_fusion_options,
-)
-from megatron.bridge.training.optim import setup_optimizer
-from megatron.bridge.training.setup import (
-    _create_peft_pre_wrap_hook,
-    _update_model_config_funcs,
-)
-from megatron.bridge.training.state import GlobalState
-from megatron.bridge.training.tokenizers.tokenizer import build_tokenizer
 from megatron.bridge.training.utils.train_utils import (
     logical_and_across_model_parallel_group,
     reduce_max_stat_across_model_parallel_group,
 )
 from megatron.bridge.utils.common_utils import get_rank_safe
-from megatron.bridge.utils.instantiate_utils import InstantiationMode
-from megatron.bridge.utils.vocab_utils import calculate_padded_vocab_size
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel
 from megatron.core.distributed.fsdp.mcore_fsdp_adapter import (
@@ -86,12 +56,9 @@
     is_pipeline_last_stage,
 )
 from megatron.core.pipeline_parallel import get_forward_backward_func
+from megatron.core.process_groups_config import ProcessGroupCollection
 from megatron.core.rerun_state_machine import get_rerun_state_machine
-from megatron.core.transformer import MegatronModule
-from megatron.core.transformer.module import Float16Module
-from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from ray.util.queue import Queue
 from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
@@ -116,55 +83,30 @@
     forward_step_arbitrary_loss,
     get_moe_metrics,
 )
-from nemo_rl.models.megatron.community_import import import_model_from_hf_name
+from nemo_rl.models.megatron.config import MegatronGenerationConfig
+from nemo_rl.models.megatron.setup import (
+    finalize_megatron_setup,
+    handle_model_import,
+    setup_distributed,
+    setup_model_and_optimizer,
+    setup_reference_model_state,
+    validate_and_set_config,
+    validate_model_paths,
+)
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import (
     ColocatablePolicyInterface,
     LogprobOutputSpec,
 )
-from nemo_rl.models.policy.utils import (
-    configure_dynamo_cache,
-    get_megatron_checkpoint_dir,
-    get_runtime_env_for_policy_worker,
-)
+from nemo_rl.models.policy.utils import get_runtime_env_for_policy_worker
 from nemo_rl.models.policy.workers.base_policy_worker import AbstractPolicyWorker
 from nemo_rl.models.policy.workers.patches import apply_transformer_engine_patch
 from nemo_rl.utils.nsys import wrap_with_nvtx_name
 from nemo_rl.utils.packed_tensor import packed_broadcast_producer
 
-try:
-    from megatron.core.distributed import (
-        TorchFullyShardedDataParallel as torch_FSDP,  # noqa: F401 unused-import
-    )
-
-    HAVE_FSDP2 = True
-except ImportError:
-    HAVE_FSDP2 = False
-
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
 
-class MegatronGenerationConfig(TypedDict):
-    # Total GPU memory (in GB) allocated for KV cache buffers
-    buffer_size_gb: int
-    # Fraction of buffer reserved for guaranteed active requests
-    buffer_guaranteed_fraction: float
-    # Number of CUDA graphs to pre-compile for different batch sizes
-    num_cuda_graphs: int
-    # Size of each KV cache block in tokens (affects memory granularity)
-    block_size_tokens: int
-    # Enable CUDA graphs for prefill/context processing
-    use_cuda_graphs_for_non_decode_steps: bool
-    # Split long prefills into chunks for better memory management
-    enable_chunked_prefill: bool
-    # Unified memory usage level (0=disabled, higher values enable more aggressive paging)
-    unified_memory_level: int
-    # Maximum number of tokens to use in a single step. Analogous to vllm's max_num_batched_tokens.
-    # Can cause OOM if set too high so should be tuned with buffer_size_gb if OOMing. If set too
-    # low, then will only do 512 tokens at a time, which can be slow.
-    max_tokens: int
-
-
 def broadcast_object_across_pp_ranks(obj):
     """Broadcast an object across pipeline parallel ranks.
 
@@ -218,273 +160,6 @@ def broadcast_object_across_pp_ranks(obj):
     return obj_list[0]
 
 
-def setup_megatron_model(
-    policy_cfg: PolicyConfig,
-    cfg: ConfigContainer,
-    load_optimizer: bool = True,
-    get_embedding_ranks=None,  # TODO @sahilj: What is this?
-    get_position_embedding_ranks=None,
-):
-    state = GlobalState()
-    state.cfg = cfg
-    # TODO: Freeze state.cfg
-
-    cfg.dist.external_gpu_device_mapping = True
-    initialize_megatron(
-        cfg=cfg,
-        get_embedding_ranks=get_embedding_ranks,
-        get_position_embedding_ranks=get_position_embedding_ranks,
-    )
-
-    if cfg.ft and cfg.ft.enable_ft_package:
-        fault_tolerance.setup(cfg, state)
-        fault_tolerance.maybe_setup_simulated_fault(cfg.ft)
-
-    # Set pytorch JIT layer fusion options and warmup JIT functions.
-    set_jit_fusion_options(cfg.model, cfg.train.micro_batch_size)
-
-    # Adjust the startup time so it reflects the largest value.
-    # This will be closer to what scheduler will see (outside of
-    # image ... launches.
-    start_time_tensor = torch.tensor(
-        [state.start_time], dtype=torch.double, device="cuda"
-    )
-    torch.distributed.all_reduce(start_time_tensor, op=torch.distributed.ReduceOp.MIN)
-    state.start_time = start_time_tensor.item()
-
-    print(
-        "time to initialize megatron (seconds): {:.3f}".format(
-            time.time() - state.start_time
-        )
-    )
-    torch.distributed.barrier()
-
-    # Context used for persisting some state between checkpoint saves.
-    checkpointing_context = init_checkpointing_context(cfg.checkpoint)
-
-    # Tokenizer
-    build_tokenizer(
-        cfg.tokenizer,
-        make_vocab_size_divisible_by=cfg.model.make_vocab_size_divisible_by
-        // cfg.model.tensor_model_parallel_size,
-        tensor_model_parallel_size=cfg.model.tensor_model_parallel_size,
-        trust_remote_code=True,
-    )
-    assert cfg.model.vocab_size, "vocab size must be specified in model config"
-
-    torch.distributed.barrier()
-
-    pre_wrap_hook = []
-    mixed_precision_wrapper = Float16Module
-
-    use_peft = policy_cfg["megatron_cfg"].get("peft", {}).get("enabled", False)
-
-    if policy_cfg["megatron_cfg"]["freeze_moe_router"]:
-        if use_peft:
-            raise ValueError(
-                "Freezing the MOE router is not currently supported when using PEFT"
-            )
-
-        def freeze_moe_router(megatron_model):
-            if not isinstance(megatron_model, list):
-                megatron_model = [megatron_model]
-            for model_module in megatron_model:
-                # Handle both wrapped (Float16Module) and unwrapped models
-                if isinstance(model_module, Float16Module):
-                    model_module = model_module.module
-                # Handle VLM models
-                if hasattr(model_module, "language_model"):
-                    model_module = model_module.language_model
-                for layer in model_module.decoder.layers:
-                    if hasattr(layer, "mlp") and hasattr(layer.mlp, "router"):
-                        layer.mlp.router.weight.requires_grad = False
-
-        mixed_precision_wrapper = CustomFloat16Module
-        pre_wrap_hook.extend([freeze_moe_router])
-
-    if use_peft:
-        peft_cfg = policy_cfg["megatron_cfg"].get("peft", {})
-        peft = LoRA(
-            target_modules=peft_cfg["target_modules"],
-            exclude_modules=peft_cfg["exclude_modules"],
-            dim=peft_cfg["dim"],
-            alpha=peft_cfg["alpha"],
-            dropout=peft_cfg["dropout"],
-            dropout_position=peft_cfg["dropout_position"],
-            lora_A_init_method=peft_cfg["lora_A_init_method"],
-            lora_B_init_method=peft_cfg["lora_B_init_method"],
-            a2a_experimental=peft_cfg["a2a_experimental"],
-            lora_dtype=peft_cfg["lora_dtype"],
-        )
-    else:
-        peft = None
-    cfg.peft = peft
-
-    if cfg.peft is not None:
-        pre_peft_hook = _create_peft_pre_wrap_hook(cfg, state)
-        cfg.model.register_pre_wrap_hook(pre_peft_hook)
-
-        def composed_peft_hook(model: list[MegatronModule]) -> list[MegatronModule]:
-            model = pre_peft_hook(model)
-            return model
-
-        pre_wrap_hook.extend([composed_peft_hook])
-
-    # Model, optimizer, and learning rate.
-    model = get_model(
-        cfg.model,
-        cfg.ddp,
-        use_torch_fsdp2=cfg.dist.use_torch_fsdp2,
-        overlap_param_gather_with_optimizer_step=cfg.optimizer.overlap_param_gather_with_optimizer_step,
-        data_parallel_random_init=cfg.rng.data_parallel_random_init,
-        pre_wrap_hook=pre_wrap_hook,
-        mixed_precision_wrapper=mixed_precision_wrapper,
-    )
-
-    if load_optimizer:
-        optimizer, scheduler = setup_optimizer(
-            optimizer_config=cfg.optimizer,
-            scheduler_config=cfg.scheduler,
-            model=model,
-            use_gloo_process_groups=cfg.dist.use_gloo_process_groups,
-        )
-    else:
-        optimizer = None
-        scheduler = None
-
-    print("Model, optimizer, and learning rate scheduler built")
-    torch.distributed.barrier()
-    if cfg.peft is not None:
-        should_load_checkpoint = cfg.checkpoint.load is not None and checkpoint_exists(
-            cfg.checkpoint.load
-        )
-        if should_load_checkpoint:
-            # The finetune toggle is explicitly set to True in order to avoid loading optimizer and RNG states
-            # This is switched off here in order to load these states from the checkpoint
-            cfg.checkpoint.finetune = False
-    else:
-        should_load_checkpoint = (
-            cfg.checkpoint.load is not None and checkpoint_exists(cfg.checkpoint.load)
-        ) or (
-            cfg.checkpoint.pretrained_checkpoint is not None
-            and checkpoint_exists(cfg.checkpoint.pretrained_checkpoint)
-        )
-
-    if should_load_checkpoint:
-        load_checkpoint(
-            state,
-            model,
-            optimizer,
-            scheduler,
-            checkpointing_context=checkpointing_context,
-            skip_load_to_model_and_opt=HAVE_FSDP2 and cfg.dist.use_torch_fsdp2,
-        )
-        print("Checkpoint loaded")
-    torch.distributed.barrier()
-
-    return state, model, optimizer, scheduler, checkpointing_context
-
-
-def destroy_parallel_state():
-    """Safely destroy parallel state and reset async call tracking.
-
-    This function is called during initialization to clean up temporary distributed
-    state from model import operations. Resetting async call tracking ensures that
-    when the main Megatron distributed context is created, all ranks start with
-    consistent call_idx values for async checkpointing.
-    """
-    if torch.distributed.is_initialized():
-        try:
-            torch.distributed.barrier()
-            torch.distributed.destroy_process_group()
-        except:
-            pass  # Ignore errors if already destroyed
-    if hasattr(parallel_state, "destroy_model_parallel"):
-        try:
-            parallel_state.destroy_model_parallel()
-        except:
-            pass  # Ignore errors if already destroyed
-
-    # Reset async calls queue to prevent call_idx mismatches after distributed context recreation
-    try:
-        import nemo.tron.utils.async_utils as nemo_async_utils
-        from megatron.core.dist_checkpointing.strategies.async_utils import (
-            AsyncCallsQueue,
-        )
-
-        # Clean up any existing async callers first
-        old_call_idx = getattr(nemo_async_utils._async_calls_queue, "call_idx", None)
-        num_unfinalized = (
-            nemo_async_utils._async_calls_queue.get_num_unfinalized_calls()
-        )
-        if num_unfinalized > 0:
-            print(
-                f"[WARNING] Resetting async calls queue with {num_unfinalized} unfinalized calls"
-            )
-        try:
-            nemo_async_utils._async_calls_queue.close()
-        except:
-            pass  # Ignore errors during cleanup
-        # Reset the global async calls queue by creating a new instance
-        nemo_async_utils._async_calls_queue = AsyncCallsQueue()
-        print(f"[DEBUG] Reset NeMo async calls queue (old call_idx: {old_call_idx})")
-    except ImportError:
-        pass
-
-    # Also reset the Megatron async calls queue if it exists
-    try:
-        import megatron.training.async_utils as megatron_async_utils
-        from megatron.core.dist_checkpointing.strategies.async_utils import (
-            AsyncCallsQueue,
-        )
-
-        # Clean up any existing async callers first
-        old_call_idx = getattr(
-            megatron_async_utils._async_calls_queue, "call_idx", None
-        )
-        num_unfinalized = (
-            megatron_async_utils._async_calls_queue.get_num_unfinalized_calls()
-        )
-        if num_unfinalized > 0:
-            print(
-                f"[WARNING] Resetting Megatron async calls queue with {num_unfinalized} unfinalized calls"
-            )
-        try:
-            megatron_async_utils._async_calls_queue.close()
-        except:
-            pass  # Ignore errors during cleanup
-        # Reset the Megatron global async calls queue as well
-        megatron_async_utils._async_calls_queue = AsyncCallsQueue()
-        print(
-            f"[DEBUG] Reset Megatron async calls queue (old call_idx: {old_call_idx})"
-        )
-    except ImportError:
-        pass
-
-    # Reset the third global async_calls instance in base strategy module
-    try:
-        import megatron.core.dist_checkpointing.strategies.base as base_strategy
-        from megatron.core.dist_checkpointing.strategies.async_utils import (
-            AsyncCallsQueue,
-        )
-
-        # Clean up and reset the global async_calls in base strategy
-        old_call_idx = getattr(base_strategy.async_calls, "call_idx", None)
-        num_unfinalized = base_strategy.async_calls.get_num_unfinalized_calls()
-        if num_unfinalized > 0:
-            print(
-                f"[WARNING] Resetting base strategy async_calls with {num_unfinalized} unfinalized calls"
-            )
-        try:
-            base_strategy.async_calls.close()
-        except:
-            pass
-        base_strategy.async_calls = AsyncCallsQueue()
-        print(f"[DEBUG] Reset base strategy async_calls (old call_idx: {old_call_idx})")
-    except ImportError:
-        pass
-
-
 @ray.remote(
     runtime_env=get_runtime_env_for_policy_worker("megatron_policy_worker")
 )  # pragma: no cover
@@ -509,471 +184,100 @@ def __init__(
         init_reference_model: bool = True,
         *,
         worker_sharding_annotations: NamedSharding,
-        pre_init_communication_queue: Queue,
         **kwargs: Any,
     ):
+        """Initialize the MegatronPolicyWorker."""
+        # Apply patch from https://github.com/NVIDIA/TransformerEngine/pull/2286/files
         apply_transformer_engine_patch()
 
-        self.is_generation_colocated = None
-        if "generation" in config and config["generation"] is not None:
-            self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
-
-        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
-        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
-        if not self.is_generation_colocated:
-            os.environ["NCCL_CUMEM_ENABLE"] = "1"
-
         self.cfg = config
-        dtype_map = {
-            "float32": torch.float32,
-            "bfloat16": torch.bfloat16,
-            "float16": torch.float16,
-        }
-        self.dtype = dtype_map[self.cfg["precision"]]
-
-        self.optimizer_cpu_offload = self.cfg["megatron_cfg"]["optimizer"][
-            "optimizer_cpu_offload"
-        ]
-        self.offload_optimizer_for_logprob = self.cfg["offload_optimizer_for_logprob"]
-
-        # Reward models are not yet supported with Megatron.
-        if "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]:
-            raise NotImplementedError(
-                "Reward models are not yet supported with the Megatron backend, this issue is "
-                "tracked in https://github.com/NVIDIA-NeMo/RL/issues/720"
-            )
-
-        # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
-        # with different order of node_bundles
-        configure_dynamo_cache()
-
-        # cfg["model_name"] is allowed to be either an HF model name or a path to an HF checkpoint
-        # check if hf_model_name is a path
-        hf_model_name = self.cfg["model_name"]
-        # Check if the checkpoint already exists
-        hf_model_subdir = hf_model_name
-        if os.path.exists(hf_model_name):
-            hf_model_subdir = f"model_{hf_model_subdir.replace('/', '_')}"
-
-        pretrained_path = f"{get_megatron_checkpoint_dir()}/{hf_model_subdir}"
-        pt_checkpoint_exists = os.path.exists(pretrained_path) and os.path.exists(
-            os.path.join(pretrained_path, "iter_0000000")
-        )
-
-        # Ensure clean slate before import
-        destroy_parallel_state()
 
-        # Set for rank for non-collocated to check which ranks to broadcast from
+        # Set rank for non-collocated to check which ranks to broadcast from
         self.rank = get_rank_safe()
-        # Need to initialize the process group before calling into Megatron-Bridge, otherwise Megatron-Bridge will try to set an incorrect device
-        torch.distributed.init_process_group("nccl")
-        if pt_checkpoint_exists:
-            print(f"Checkpoint already exists at {pretrained_path}. Skipping import.")
-        else:
-            hf_config_overrides = self.cfg.get("hf_config_overrides", {}) or {}
-            import_model_from_hf_name(
-                hf_model_name,
-                pretrained_path,
-                self.cfg["megatron_cfg"],
-                **hf_config_overrides,
-            )
 
-            if parallel_state.model_parallel_is_initialized():
-                print("Reinitializing model parallel after loading model state.")
-                parallel_state.destroy_model_parallel()
+        # Step 1: Setup distributed
+        setup_distributed()
 
-        pretrained_run_config = os.path.join(
-            pretrained_path, "iter_0000000/run_config.yaml"
+        # Step 2: Validate and setup model paths
+        hf_model_name, pretrained_path, pt_checkpoint_exists = validate_model_paths(
+            config
+        )
+        # Handle model import if needed
+        handle_model_import(
+            config, hf_model_name, pretrained_path, pt_checkpoint_exists
         )
 
+        # Store tokenizer
         self.tokenizer = tokenizer
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
-        if not os.path.exists(pretrained_run_config):
-            raise FileNotFoundError(
-                f"Pretrained run config not found at {pretrained_run_config} on rank={get_rank_safe()}. This usually means that the one-time HF->mcore conversion on rank=0 saved to a directory not being mounted on this node. Please check "
-            )
-
-        try:
-            cfg_from_pretrained = ConfigContainer.from_yaml(
-                pretrained_run_config, mode=InstantiationMode.STRICT
-            )
-        except Exception as e:
-            # Add helpful context as a note to the exception
-            e.add_note(
-                f"\n{'=' * 80}\n"
-                f"NOTE: A common cause of this error is when the HF->mcore converted checkpoint is\n"
-                f"created with an older version of megatron-bridge.\n"
-                f"If this checkpoint is old or was generated by a different code version,\n"
-                f"try deleting it and rerunning the code.\n"
-                f"The checkpoint will be automatically regenerated with the current version.\n\n"
-                f"Checkpoint location: {pretrained_path}\n"
-                f"{'=' * 80}"
-            )
-            raise
-        model_cfg = cfg_from_pretrained.model
-        cfg_from_pretrained.logger = LoggerConfig()
-
-        model_cfg.tensor_model_parallel_size = self.cfg["megatron_cfg"][
-            "tensor_model_parallel_size"
-        ]
-        model_cfg.pipeline_model_parallel_size = self.cfg["megatron_cfg"][
-            "pipeline_model_parallel_size"
-        ]
-        model_cfg.num_layers_in_first_pipeline_stage = self.cfg["megatron_cfg"][
-            "num_layers_in_first_pipeline_stage"
-        ]
-        model_cfg.num_layers_in_last_pipeline_stage = self.cfg["megatron_cfg"][
-            "num_layers_in_last_pipeline_stage"
-        ]
-        model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
-        model_cfg.context_parallel_size = self.cfg["megatron_cfg"][
-            "context_parallel_size"
-        ]
-        if model_cfg.context_parallel_size > 1:
-            assert self.cfg["sequence_packing"]["enabled"], (
-                "Sequence Packing must be enabled to use Context Parallelism with MCore"
-            )
-        model_cfg.expert_tensor_parallel_size = self.cfg["megatron_cfg"][
-            "expert_tensor_parallel_size"
-        ]
-        model_cfg.expert_model_parallel_size = self.cfg["megatron_cfg"][
-            "expert_model_parallel_size"
-        ]
-
-        # Setting moe_router_dtype to higher precision (e.g. fp64) can improve numerical stability,
-        # especially when using many experts.
-        model_cfg.moe_router_dtype = self.cfg["megatron_cfg"]["moe_router_dtype"]
-
-        # The below two configs (and "freeze_moe_router") are used to stabilize moe training
-        # by preventing updates to the moe router. We found that this is helpful in reducing
-        # logprob error during training.
-
-        # Set this to "none" to disable load balancing loss.
-        model_cfg.moe_router_load_balancing_type = self.cfg["megatron_cfg"][
-            "moe_router_load_balancing_type"
-        ]
-        # Set this to 0.0 to disable updates to the moe router expert bias
-        model_cfg.moe_router_bias_update_rate = self.cfg["megatron_cfg"][
-            "moe_router_bias_update_rate"
-        ]
+        # Step 3: Setup model configuration
+        runtime_config = validate_and_set_config(
+            config,
+            self.rank,
+            hf_model_name,
+            pretrained_path,
+            weights_path,
+            tokenizer,
+        )
 
-        model_cfg.moe_permute_fusion = self.cfg["megatron_cfg"]["moe_permute_fusion"]
-        if "layernorm_epsilon" in self.cfg["megatron_cfg"]:
-            model_cfg.layernorm_epsilon = self.cfg["megatron_cfg"]["layernorm_epsilon"]
-
-        model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
-        model_cfg.bf16 = self.dtype == torch.bfloat16
-        model_cfg.fp16 = self.dtype == torch.float16
-        if model_cfg.fp16:
-            assert not model_cfg.bf16, "fp16 and bf16 cannot be used together"
-            model_cfg.params_dtype = torch.float16
-        elif model_cfg.bf16:
-            assert not model_cfg.fp16, "fp16 and bf16 cannot be used together"
-            model_cfg.params_dtype = torch.bfloat16
-        else:
-            model_cfg.params_dtype = torch.float32
-        model_cfg.pipeline_dtype = dtype_map[self.cfg["megatron_cfg"]["pipeline_dtype"]]
-        model_cfg.parallel_output = True
-        if self.cfg["megatron_cfg"]["activation_checkpointing"]:
-            model_cfg.recompute_granularity = "full"
-            model_cfg.recompute_method = "uniform"
-            model_cfg.recompute_num_layers = 1
-        if not model_cfg.gated_linear_unit:
-            assert model_cfg.activation_func is not None, (
-                "activation_func must be set if not using gated_linear_unit. This likely "
-                "indicates an issue in configuration conversion (e.g. activation func was "
-                "a lambda and couldn't be serialized). This is based on this check "
-                "https://github.com/NVIDIA/Megatron-LM/blob/1ab876ddc4c1893c76f26d775226a8d1dcdfb3d2/megatron/core/transformer/mlp.py#L174."
-            )
-        model_cfg.apply_rope_fusion = self.cfg["megatron_cfg"]["apply_rope_fusion"]
-        model_cfg.bias_activation_fusion = self.cfg["megatron_cfg"][
-            "bias_activation_fusion"
-        ]
-        fp8_cfg = self.cfg["megatron_cfg"].get("fp8_cfg", None)
-        self.fp8_cfg = fp8_cfg
-        if fp8_cfg is not None and fp8_cfg.get("enabled", False):
-            try:
-                model_cfg.fp8 = fp8_cfg["fp8"]
-                model_cfg.fp8_recipe = fp8_cfg["fp8_recipe"]
-                model_cfg.fp8_param = fp8_cfg["fp8_param"]
-            except KeyError as e:
-                raise KeyError(f"Missing key in fp8_cfg: {e}")
-            if model_cfg.fp8_param:
-                warnings.warn(
-                    "Setting fp8_param=True sometimes causes NaN token_mult_prob_error, please use with caution. "
-                    "Refer to https://github.com/NVIDIA-NeMo/RL/issues/1164 for latest updates with this issue."
-                )
+        self.megatron_cfg = runtime_config.megatron_cfg
+        self.dtype = runtime_config.dtype
+        self.optimizer_cpu_offload = runtime_config.optimizer_cpu_offload
+        self.offload_optimizer_for_logprob = (
+            runtime_config.offload_optimizer_for_logprob
+        )
+        self.is_generation_colocated = runtime_config.is_generation_colocated
+        self.final_padded_vocab_size = runtime_config.final_padded_vocab_size
 
-        optimizer_cpu_offload = self.cfg["megatron_cfg"]["optimizer"][
-            "optimizer_cpu_offload"
-        ]
-        optimizer_offload_fraction = self.cfg["megatron_cfg"]["optimizer"][
-            "optimizer_offload_fraction"
-        ]
-        if optimizer_cpu_offload:
-            # Currently, hybrid optimizer (partly on GPU and partly on CPU) is not supported because it conflicts with the way
-            # Nemo-rl handles the optimizer offload/onload between generation and training. So if using CPU optimizer the offload_fraction should be 1.0.
-            assert optimizer_offload_fraction == 1.0, (
-                "Currently for optimizer offloading, only optimizer_offload_fraction=1.0 is supported"
-            )
-        if (
-            "logprob_chunk_size" in self.cfg
-            and self.cfg["logprob_chunk_size"] is not None
-            and self.cfg["logprob_chunk_size"] > 0
-        ):
-            assert self.cfg["megatron_cfg"]["defer_fp32_logits"], (
-                "defer_fp32_logits must be True if logprob_chunk_size is set"
-            )
         self.defer_fp32_logits = self.cfg["megatron_cfg"].get(
             "defer_fp32_logits", None
-        ) and (model_cfg.fp16 or model_cfg.bf16)
-
-        checkpoint_config = CheckpointConfig(
-            save_interval=100,
-            save=weights_path,
-            load=weights_path,
-            pretrained_checkpoint=pretrained_path,  # This is the path to the pretrained ckpt for the SFT case
-            async_save=False,  # This doesn't work right now.
-            fully_parallel_save=True,
-            fully_parallel_load=True,  # Enable fully parallel load
-            load_rng=False,
-        )
-        ref_checkpoint_config = CheckpointConfig(
-            pretrained_checkpoint=pretrained_path,  # This is the path to the pretrained ckpt for the SFT case
-            save=None,
-            load=None,
-            fully_parallel_load=True,  # Enable fully parallel load
-            load_rng=False,
-        )
+        ) and (runtime_config.model_cfg.fp16 or runtime_config.model_cfg.bf16)
 
-        assert "train_iters" in self.cfg["megatron_cfg"], (
-            "train_iters must be set in megatron_cfg. For an example, see "
-            "https://github.com/NVIDIA-NeMo/RL/blob/bccbc377705a81a1f4b3c31ad9767bcc15f735a8/nemo_rl/algorithms/sft.py#L175-L179."
-        )
+        # Store FP8 config for later use
+        self.fp8_cfg = config["megatron_cfg"].get("fp8_cfg", None)
 
-        ## These settings are required for correct gradient computations in mcore
-        ## when calculate_per_token_loss is True, there is no scaling of the gradient in mcore,
-        ## so we handle the scaling in nemo-rl.
-        ## perform_initialization = True is a workaround to ensure the correct tensor parallel attributes are set
-        ## on the TP-sharded parameters.
-        model_cfg.calculate_per_token_loss = True
-        model_cfg.perform_initialization = True
-
-        assert (
-            "aux_loss" not in model_cfg.moe_router_load_balancing_type
-            or model_cfg.moe_aux_loss_coeff == 0
-        ), (
-            "MoE aux loss is currently not supported due to a known bug in Megatron-LM. "
-            "See https://github.com/NVIDIA/Megatron-LM/issues/1984 for more details."
-        )
-
-        self.megatron_cfg = ConfigContainer(
-            model=model_cfg,
-            checkpoint=checkpoint_config,
-            logger=LoggerConfig(logging_level=0),
-            train=TrainingConfig(
-                micro_batch_size=1,  # ignored
-                global_batch_size=self.cfg["train_global_batch_size"],  # ignored
-                train_iters=self.cfg["megatron_cfg"][
-                    "train_iters"
-                ],  # Set by algorithm setup
-            ),
-            optimizer=OptimizerConfig(
-                **self.cfg["megatron_cfg"]["optimizer"],
-            ),
-            ddp=DistributedDataParallelConfig(
-                check_for_nan_in_grad=True,
-                grad_reduce_in_fp32=self.cfg["megatron_cfg"][
-                    "distributed_data_parallel_config"
-                ]["grad_reduce_in_fp32"],
-                overlap_grad_reduce=self.cfg["megatron_cfg"][
-                    "distributed_data_parallel_config"
-                ]["overlap_grad_reduce"],
-                overlap_param_gather=self.cfg["megatron_cfg"][
-                    "distributed_data_parallel_config"
-                ]["overlap_param_gather"],
-                # we need to set average_in_collective=False with calculate_per_token_loss=True.
-                # otherwise, mcore throws an assertion error.
-                average_in_collective=False,
-                use_distributed_optimizer=self.cfg["megatron_cfg"]["optimizer"][
-                    "use_distributed_optimizer"
-                ],
-                data_parallel_sharding_strategy=self.cfg["megatron_cfg"][
-                    "distributed_data_parallel_config"
-                ]["data_parallel_sharding_strategy"],
-            ),
-            scheduler=SchedulerConfig(
-                **self.cfg["megatron_cfg"]["scheduler"],
-            ),
-            dataset=None,
-            tokenizer=TokenizerConfig(
-                tokenizer_type="HuggingFaceTokenizer",
-                tokenizer_model=hf_model_name,
-            ),
-        )
-        # TODO: this validation should happen inside mbridge: https://github.com/NVIDIA-NeMo/Megatron-Bridge/issues/1665
-        if self.dtype == torch.bfloat16:
-            assert self.megatron_cfg.model.bf16 == True, (
-                "policy.megatron_cfg.model.bf16=True must be set if policy.precision=bfloat16. This is handled by nemo-rl so this indicates something is misconfigured."
-            )
-            assert (
-                self.megatron_cfg.optimizer.use_precision_aware_optimizer == False
-                or self.megatron_cfg.optimizer.bf16 == True
-            ), (
-                "policy.megatron_cfg.optimizer.bf16=True must be set if policy.precision=bfloat16 when using use_precision_aware_optimizer=True"
-            )
-        elif self.dtype == torch.float16:
-            assert self.megatron_cfg.model.fp16 == True, (
-                "policy.megatron_cfg.model.fp16=True must be set if policy.precision=float16. This is handled by nemo-rl so this indicates something is misconfigured."
-            )
-            assert (
-                self.megatron_cfg.optimizer.use_precision_aware_optimizer == False
-                or self.megatron_cfg.optimizer.fp16 == True
-            ), (
-                "policy.megatron_cfg.optimizer.fp16=True must be set if policy.precision=float16 when using use_precision_aware_optimizer=True"
-            )
-        elif self.dtype == torch.float32:
-            assert (
-                self.megatron_cfg.model.bf16 == False
-                and self.megatron_cfg.model.fp16 == False
-            ), (
-                "policy.megatron_cfg.model.bf16=False and policy.megatron_cfg.model.fp16=False must be set if policy.precision=float32. This is handled by nemo-rl so this indicates something is misconfigured."
-            )
-            assert (
-                self.megatron_cfg.optimizer.bf16 == False
-                and self.megatron_cfg.optimizer.fp16 == False
-            ), (
-                "policy.megatron_cfg.optimizer.bf16=False and policy.megatron_cfg.optimizer.fp16=False must be set if policy.precision=float32"
-            )
+        # Validate configuration
         self.megatron_cfg.validate()
-        (
-            self.mcore_state,
-            self.model,
-            self.optimizer,
-            self.scheduler,
-            self.checkpointing_context,
-        ) = setup_megatron_model(
-            policy_cfg=self.cfg, cfg=self.megatron_cfg, load_optimizer=init_optimizer
+
+        # Step 4: Setup Megatron model and components
+        model_and_optimizer_state = setup_model_and_optimizer(
+            config, self.megatron_cfg, init_optimizer
         )
 
-        # Set the param sync function for the model
-        if (
-            self.megatron_cfg.ddp.overlap_param_gather
-            and self.megatron_cfg.ddp.align_param_gather
-        ):
-            self.megatron_cfg.param_sync_func = [
-                model_chunk.start_param_sync for model_chunk in self.model
-            ]
-            if len(self.model) == 1:
-                self.megatron_cfg.param_sync_func = self.megatron_cfg.param_sync_func[0]
+        self.mcore_state = model_and_optimizer_state.state
+        self.model = model_and_optimizer_state.model
+        self.optimizer = model_and_optimizer_state.optimizer
+        self.scheduler = model_and_optimizer_state.scheduler
+        self.checkpointing_context = model_and_optimizer_state.checkpointing_context
+        param_sync_func = model_and_optimizer_state.param_sync_func
 
-        self.model = self.model[0]  # Get the first model from the list
+        # Set the param sync function for the model if needed
+        if param_sync_func is not None:
+            self.megatron_cfg.param_sync_func = param_sync_func
 
+        # Step 5: Setup reference model if needed
         if init_reference_model:
             self.model = self.move_model(self.model, "cpu")
-            ref_ckpt_context = init_checkpointing_context(ref_checkpoint_config)
-
-            # Create a separate megatron config for the reference model with the correct checkpoint config
-            ref_megatron_cfg = ConfigContainer(
-                model=self.megatron_cfg.model,
-                checkpoint=ref_checkpoint_config,  # Use the reference checkpoint config
-                logger=self.megatron_cfg.logger,
-                train=self.megatron_cfg.train,
-                optimizer=self.megatron_cfg.optimizer,
-                ddp=self.megatron_cfg.ddp,
-                scheduler=self.megatron_cfg.scheduler,
-                dataset=self.megatron_cfg.dataset,
-                tokenizer=self.megatron_cfg.tokenizer,
-            )
-
-            # Create a separate state object for the reference model
-            ref_state = GlobalState()
-            ref_state.cfg = ref_megatron_cfg
-
-            # Configure mixed precision wrapper for reference model
-            ref_mixed_precision_wrapper = Float16Module
-            if self.cfg["megatron_cfg"].get("freeze_moe_router", False):
-                ref_mixed_precision_wrapper = CustomFloat16Module
-
-            reference_model = get_model(
-                self.megatron_cfg.model,
-                self.megatron_cfg.ddp,
-                use_torch_fsdp2=self.megatron_cfg.dist.use_torch_fsdp2,
-                overlap_param_gather_with_optimizer_step=self.megatron_cfg.optimizer.overlap_param_gather_with_optimizer_step,
-                pre_wrap_hook=self.megatron_cfg.rng.data_parallel_random_init,
-                mixed_precision_wrapper=ref_mixed_precision_wrapper,
+            self.reference_state_dict = setup_reference_model_state(
+                config, self.megatron_cfg, pretrained_path
             )
-            print("Loading the Reference Model")
-            if (
-                ref_checkpoint_config.pretrained_checkpoint is not None
-                and checkpoint_exists(ref_checkpoint_config.pretrained_checkpoint)
-            ):
-                load_checkpoint(
-                    ref_state,  # Use the separate state object with ref checkpoint config
-                    reference_model,
-                    None,  # no optimizer
-                    None,  # no scheduler
-                    checkpointing_context=ref_ckpt_context,
-                    skip_load_to_model_and_opt=HAVE_FSDP2
-                    and self.megatron_cfg.dist.use_torch_fsdp2,
-                )
-                reference_model = reference_model[0]
-                reference_model.eval()
-                self.reference_state_dict = {}
-                for name, item in reference_model.state_dict().items():
-                    if isinstance(item, torch.Tensor):
-                        cpu_item = item.detach().to(
-                            device="cpu", non_blocking=True, copy=True
-                        )
-                        del item
-                    else:
-                        cpu_item = item
-                    self.reference_state_dict[name] = cpu_item
-                print("Reference model loaded")
-            else:
-                print("Reference model not loaded")
-
             self.model = self.move_model(self.model, "cuda")
 
-        _update_model_config_funcs(
-            [self.model],
-            self.megatron_cfg.model,
-            self.megatron_cfg.ddp,
+        # Step 6: Finalize setup
+        (
+            self.megatron_tokenizer,
+            self.megatron_bridge,
+            self.should_disable_forward_pre_hook,
+            self.dp_size,
+        ) = finalize_megatron_setup(
+            config,
+            self.megatron_cfg,
+            hf_model_name,
+            worker_sharding_annotations,
+            self.model,
             self.optimizer,
-            align_grad_reduce=self.megatron_cfg.dist.align_grad_reduce,
-        )
-
-        tokenizer_config = TokenizerConfig(
-            tokenizer_type="HuggingFaceTokenizer",
-            tokenizer_model=hf_model_name,
-        )
-
-        self.megatron_tokenizer = build_tokenizer(
-            tokenizer_config,
-            make_vocab_size_divisible_by=self.megatron_cfg.model.make_vocab_size_divisible_by
-            // self.cfg["megatron_cfg"]["tensor_model_parallel_size"],
-            tensor_model_parallel_size=self.cfg["megatron_cfg"][
-                "tensor_model_parallel_size"
-            ],
-            trust_remote_code=True,
-        )
-        self.final_padded_vocab_size = calculate_padded_vocab_size(
-            self.megatron_cfg.model.vocab_size,
-            self.megatron_cfg.model.make_vocab_size_divisible_by,
-            self.cfg["megatron_cfg"]["tensor_model_parallel_size"],
-        )
-        self.dp_size = worker_sharding_annotations.get_axis_size("data_parallel")
-        self.megatron_bridge = AutoBridge.from_hf_pretrained(
-            hf_model_name, trust_remote_code=True
-        )
-
-        self.should_disable_forward_pre_hook = (
-            self.cfg["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
-            and self.cfg["megatron_cfg"]["distributed_data_parallel_config"][
-                "overlap_param_gather"
-            ]
         )
 
         # vars used for refit
@@ -982,9 +286,7 @@ def __init__(
         # [(mcore_param_name, estimated_memory), ...]
         # Note: here param name is local param name, with local layer number and
         # local expert id etc.
-        self.refit_conversion_tasks = (
-            None  # Meta data for conversion params from megatron bridge
-        )
+        self.refit_conversion_tasks = None
         self.refit_conversion_tasks_current_index = None
         self.refit_param_info_mcore = None
 
@@ -1171,18 +473,20 @@ def train(
                 else:
                     update_successful, grad_norm, num_zeros_in_grad = (True, 0.0, 0.0)
 
+                pg_collection = get_pg_collection(self.model)
+
                 # when freezing sub-models we may have a mixture of successful and unsucessful ranks,
                 # so we must gather across mp ranks
                 update_successful = logical_and_across_model_parallel_group(
-                    update_successful
+                    update_successful, mp_group=pg_collection.mp
                 )
                 # grad_norm and num_zeros_in_grad will be None on ranks without trainable params,
                 # so we must gather across mp ranks
                 grad_norm: float = reduce_max_stat_across_model_parallel_group(
-                    grad_norm
+                    grad_norm, mp_group=pg_collection.mp
                 )
                 num_zeros_in_grad: float = reduce_max_stat_across_model_parallel_group(
-                    num_zeros_in_grad
+                    num_zeros_in_grad, mp_group=pg_collection.mp
                 )
 
                 if update_successful:
@@ -2715,40 +2019,3 @@ def _percentile(values: list[float], p: float) -> float:
                 final_result = obj_list[0]  # type: ignore
 
         return final_result
-
-
-class CustomFloat16Module(Float16Module):
-    """Float 16 Module.
-
-    Attributes:
-        config (TransformerConfig): Transformer config
-        fp16 (bool) : Specifies if the model runs in fp16 mode
-        bf16 (bool) : Specifies if the model runs in bf16 mode
-
-    Args:
-        config (TransformerConfig): The transformer config used to initalize the model
-    """
-
-    def __init__(self, config: TransformerConfig, module: torch.nn.Module):
-        super(CustomFloat16Module, self).__init__(config, module)
-        self.re_enable_float32_expert_bias()
-
-    def re_enable_float32_expert_bias(self) -> None:
-        """Ensure MoE router expert bias stays in float32 for numerical stability.
-
-        Walks the wrapped module to find MoE routers and invokes the
-        `_maintain_float32_expert_bias()` helper which recreates or casts the
-        expert bias tensors to float32 as required by Megatron-LM.
-        """
-        module = self.module
-        # Handle VLM models where language model is nested
-        if hasattr(module, "language_model"):
-            module = module.language_model
-        if hasattr(module, "decoder") and hasattr(module.decoder, "layers"):
-            for layer in module.decoder.layers:
-                mlp = getattr(layer, "mlp", None)
-                router = getattr(mlp, "router", None) if mlp is not None else None
-                if router is not None and hasattr(
-                    router, "_maintain_float32_expert_bias"
-                ):
-                    router._maintain_float32_expert_bias()
diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py
index f329dd70c7..f8e9ad0c6f 100644
--- a/nemo_rl/utils/logger.py
+++ b/nemo_rl/utils/logger.py
@@ -99,6 +99,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,
+        step_finished: bool = False,
     ) -> None:
         """Log a dictionary of metrics."""
         pass
@@ -144,6 +145,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,  # ignored in TensorBoard
+        step_finished: bool = False,  # ignored in TensorBoard
     ) -> None:
         """Log metrics to Tensorboard.
 
@@ -199,6 +201,14 @@ class WandbLogger(LoggerInterface):
 
     def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None):
         self.run = wandb.init(**cfg, dir=log_dir)
+
+        if os.environ.get("RAY_BACKEND_LOG_LEVEL", "").lower() == "debug":
+            print(
+                "Uploading raylet.out and raylet.err files to W&B since environment variable RAY_BACKEND_LOG_LEVEL=debug"
+            )
+            wandb.save("/tmp/ray/session_latest/logs/raylet.out", policy="live")
+            wandb.save("/tmp/ray/session_latest/logs/raylet.err", policy="live")
+
         self._log_code()
         self._log_diffs()
         print(
@@ -332,6 +342,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,
+        step_finished: bool = False,
     ) -> None:
         """Log metrics to wandb.
 
@@ -352,6 +363,10 @@ def log_metrics(
         if step_metric and step_metric in metrics:
             # commit=False so the step does not get incremented
             self.run.log(metrics, commit=False)
+        elif step_finished:
+            # Commit param defaults to None. By default if step is set, then commit defaults to False
+            # Here, we have an explicit fork for commit in case W&B ever decides to change their default logic.
+            self.run.log(metrics, step=step, commit=True)
         else:
             self.run.log(metrics, step=step)
 
@@ -404,6 +419,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,
+        step_finished: bool = False,
     ) -> None:
         """Log metrics to the associated Swanlab run.
 
@@ -781,6 +797,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,
+        step_finished: bool = False,
     ) -> None:
         """Log metrics to MLflow.
 
@@ -906,6 +923,7 @@ def log_metrics(
         step: int,
         prefix: Optional[str] = "",
         step_metric: Optional[str] = None,
+        step_finished: bool = False,
     ) -> None:
         """Log metrics to all enabled backends.
 
@@ -917,7 +935,7 @@ def log_metrics(
                          of the provided step value (currently only needed for wandb)
         """
         for logger in self.loggers:
-            logger.log_metrics(metrics, step, prefix, step_metric)
+            logger.log_metrics(metrics, step, prefix, step_metric, step_finished)
 
     def log_hyperparams(self, params: Mapping[str, Any]) -> None:
         """Log hyperparameters to all enabled backends.
@@ -954,6 +972,24 @@ def log_batched_dict_as_jsonl(
 
         print(f"Logged data to {filepath}")
 
+    def log_string_list_as_jsonl(self, to_log: list[str], filename: str) -> None:
+        """Log a list of strings to a JSONL file.
+
+        Args:
+            to_log: list of strings to log
+            filename: Filename to log to (within the log directory)
+        """
+        # Create full path within log directory
+        filepath = os.path.join(self.base_log_dir, filename)
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+
+        # Write to JSONL file
+        with open(filepath, "a") as f:
+            for sample in to_log:
+                f.write(sample + "\n")
+
+        print(f"Logged data to {filepath}")
+
     def log_plot_per_worker_timeline_metrics(
         self,
         metrics: dict[int, list[Any]],
diff --git a/nemo_rl/utils/memory_tracker.py b/nemo_rl/utils/memory_tracker.py
new file mode 100644
index 0000000000..be55426205
--- /dev/null
+++ b/nemo_rl/utils/memory_tracker.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from typing import List, Optional
+
+from psutil import Process
+from pydantic import BaseModel, Field
+from ray.scripts.scripts import memory_summary
+
+
+class MemoryTrackerDataPoint(BaseModel):
+    stage: str
+    memory_used_before_stage_gb: float
+    variables_before_stage: List[str]
+
+    memory_used_after_stage_gb: Optional[float] = None
+    variables_after_stage: Optional[List[str]] = None
+
+    @property
+    def mem_used_diff_gb(self) -> float:
+        return self.memory_used_after_stage_gb - self.memory_used_before_stage_gb
+
+    @property
+    def new_variables(self) -> List[str]:
+        return [
+            v
+            for v in self.variables_after_stage
+            if v not in self.variables_before_stage
+        ]
+
+    def get_snapshot_str(self) -> str:
+        ray_memory_summary = memory_summary(stats_only=True, num_entries=5)
+        return f"""💭 Driver CPU memory tracker for {self.stage}:
+- Mem usage before                  {self.memory_used_before_stage_gb:>7.2f} GB
+- Mem usage after                   {self.memory_used_after_stage_gb:>7.2f} GB
+- Mem usage diff (after - before)   {self.mem_used_diff_gb:>+7.2f} GB
+- New variables: {self.new_variables}
+
+⚡️ Ray memory snapshot:
+{ray_memory_summary}"""
+
+
+class MemoryTracker(BaseModel):
+    data_points: List[MemoryTrackerDataPoint] = Field(default_factory=list)
+
+    def model_post_init(self, context):
+        self._process = Process(os.getpid())
+        return super().model_post_init(context)
+
+    def snapshot_start_of_stage(
+        self, new_stage: str, all_current_variables: List[str]
+    ) -> None:
+        mem_info = self._process.memory_info()
+        current_mem_used_gb: float = mem_info.rss / (1024**3)
+
+        if self.data_points:
+            last_data_point = self.data_points[-1]
+            last_data_point.memory_used_after_stage_gb = current_mem_used_gb
+            last_data_point.variables_after_stage = all_current_variables
+
+            print(last_data_point.get_snapshot_str())
+
+        self.data_points.append(
+            MemoryTrackerDataPoint(
+                stage=new_stage,
+                memory_used_before_stage_gb=current_mem_used_gb,
+                variables_before_stage=all_current_variables,
+            )
+        )
diff --git a/pyproject.toml b/pyproject.toml
index 87198f1e92..60e3bbc377 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=42", "wheel"]
+requires = ["setuptools>=42", "wheel>=0.46.2"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
@@ -83,6 +83,26 @@ vllm = [
   "vllm==0.11.2",
   "num2words>=0.5.14",
 ]
+sglang = [
+  "sglang==0.5.7",
+  "pybase64",
+  "orjson",
+  "uvloop",
+  "requests",
+  "openai",
+  "partial-json-parser",
+  "sentencepiece",
+  "sgl-kernel",
+  "compressed-tensors",
+  "msgspec",
+  "python-multipart",
+  "torchao",
+  "xgrammar",
+  "interegular",
+  "openai-harmony",
+  "torch-memory-saver",
+  "einops",
+]
 mcore = [
   # also need cudnn (https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network)
   # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
@@ -100,6 +120,7 @@ mcore = [
   # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
   # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
   "flash-attn==2.8.1",
+  "deep_ep @ git+https://github.com/deepseek-ai/DeepEP.git@bfded34800dfec415b71503f8205181de90b2480",
   # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
   "vllm==0.11.2",
 ]
@@ -172,13 +193,14 @@ triton = [
 causal-conv1d = { git = "https://github.com/Dao-AILab/causal-conv1d", rev = "67e0a9dfe1518fc0036444e9ab5fe06ab78299e0" }
 mamba-ssm = { git = "https://github.com/state-spaces/mamba.git", rev = "d68d16ed7d5d5164eb5a57c0285f3b7eb8394ec1" }
 nv-grouped-gemm = { git = "https://github.com/fanshiqing/grouped_gemm", tag = "v1.1.4.post7" }
+sgl-kernel = { git = "https://github.com/sgl-project/sglang", tag = "v0.5.7", subdirectory = "sgl-kernel" }
 
 [tool.uv.workspace]
 members = [
   "3rdparty/Megatron-LM-workspace",
   "3rdparty/Automodel-workspace/Automodel",
   "3rdparty/Megatron-Bridge-workspace",
-  "3rdparty/Gym-workspace",
+  "3rdparty/Gym-workspace/Gym",
   # Research projects are also added here in order for them to share the global root level uv.lock.
   # If we don't do this, the research projects do not see the global uv.lock, and may mistakenly
   # install numpy>=2.0 because nemo-rl's core [dependencies] do not pin numpy, but when you inspect
@@ -199,6 +221,7 @@ explicit = true
 
 [tool.uv]
 preview = true # Enable preview features like extra-build-dependencies
+extra-build-variables = { sgl-kernel = { CMAKE_BUILD_PARALLEL_LEVEL = "24", FLASHINFER_CUDA_ARCH_LIST = "9.0a 10.0a" } }
 no-build-isolation-package = [
   "transformer-engine-torch",
   "transformer-engine",
@@ -208,6 +231,7 @@ no-build-isolation-package = [
   "deep_gemm",
   "deep_ep",
   "nv-grouped-gemm",          # from mlm (added here to make sure it's built no isolation since mlm workspace uses setup.py)
+  "sgl-kernel",
 ]
 # Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
 # and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
@@ -227,6 +251,13 @@ override-dependencies = [
   "opencv-python-headless>=4.11.0",
   "timm<=1.0.22",
   "nvidia-modelopt[torch]>=0.39.0",
+  # sglang requires torch 2.9.1, but we need 2.9.0
+  "torch==2.9.0",
+  "torchaudio==2.9.0",
+  # sglang has conflicting llguidance versions than vllm, so enforcing vllm's version since it's newer
+  "llguidance>=1.3.0,<1.4.0",
+  # Override setuptools range in other dependencies to address CVE GHSA-58pv-8j8x-9vj2
+  "setuptools>=80.10.2",
 ]
 # CVE fixes
 constraint-dependencies = [
@@ -234,8 +265,30 @@ constraint-dependencies = [
   "starlette>=0.49.1", # Address CVE GHSA-7f5h-v6xp-fcq8
   "urllib3>=2.6.3",    # Address CVE GHSA-38jv-5279-wg99
   "aiohttp>=3.13.3",   # Address CVE GHSA-mqqc-3gqh-h2x8
+  "pyasn1>=0.6.2",     # Address CVE GHSA-63vm-454h-vhhq
+  "wheel>=0.46.2",     # Address CVE GHSA-8rrh-rw8j-w5fx
+]
+
+conflicts = [
+  [
+    { extra = "fsdp" },
+    { extra = "sglang" },
+  ],
+  [
+    { extra = "automodel" },
+    { extra = "sglang" },
+  ],
+  [
+    { extra = "mcore" },
+    { extra = "sglang" },
+  ],
+  [
+    { extra = "vllm" },
+    { extra = "sglang" },
+  ],
 ]
 
+
 # Augment build dependencies for packages that need torch at build time
 [tool.uv.extra-build-dependencies]
 flash-attn = [{ requirement = "torch", match-runtime = true }]
@@ -247,6 +300,7 @@ transformer-engine-torch = [{ requirement = "torch", match-runtime = true }]
 mamba-ssm = [{ requirement = "torch", match-runtime = true }]
 causal-conv1d = [{ requirement = "torch", match-runtime = true }]
 nv-grouped-gemm = [{ requirement = "torch", match-runtime = true }]
+sgl-kernel = [{ requirement = "torch", match-runtime = true }]
 
 # Needed when building from source
 [[tool.uv.dependency-metadata]]
@@ -283,6 +337,12 @@ name = "nv-grouped-gemm"
 version = "v1.1.4.post7"
 requires-dist = ["setuptools", "wheel", "torch", "numpy"]
 
+[[tool.uv.dependency-metadata]]
+name = "sgl-kernel"
+# This version has to match the version in the commit/rev/tag used
+version = "0.3.20"
+requires-dist = ["torch", "scikit-build-core", "wheel"]
+
 [tool.black]
 line-length = 120
 include = '\.pyi?$'
@@ -304,6 +364,7 @@ markers = [
   "hf_gated: marks tests that require HuggingFace token access for gated models",
   "automodel: marks tests that require the automodel extra",
   "vllm: marks tests that require the vllm extra",
+  "sglang: marks tests that require the sglang extra",
 ]
 
 [tool.pyrefly]
diff --git a/pyrefly.toml b/pyrefly.toml
index e1127eadd2..32e67b658a 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -38,8 +38,8 @@ project-includes = [
   "examples/custom_parallel/llama_nemotron_super_49b_custom_plan.py",
   "nemo_rl/algorithms/__init__.py",
   "nemo_rl/algorithms/interfaces.py",
-  "nemo_rl/algorithms/utils.py",
   "nemo_rl/algorithms/reward_functions.py",
+  "nemo_rl/algorithms/utils.py",
   "nemo_rl/data/__init__.py",
   "nemo_rl/data/chat_templates.py",
   "nemo_rl/data/collate_fn.py",
@@ -59,13 +59,15 @@ project-includes = [
   "nemo_rl/data/datasets/processed_dataset.py",
   "nemo_rl/data/datasets/raw_dataset.py",
   "nemo_rl/data/datasets/response_datasets/__init__.py",
+  "nemo_rl/data/datasets/response_datasets/aime24.py",
   "nemo_rl/data/datasets/response_datasets/clevr.py",
+  "nemo_rl/data/datasets/response_datasets/dapo_math.py",
   "nemo_rl/data/datasets/response_datasets/deepscaler.py",
   "nemo_rl/data/datasets/response_datasets/geometry3k.py",
+  "nemo_rl/data/datasets/response_datasets/helpsteer3.py",
   "nemo_rl/data/datasets/response_datasets/oai_format_dataset.py",
   "nemo_rl/data/datasets/response_datasets/oasst.py",
   "nemo_rl/data/datasets/response_datasets/openmathinstruct2.py",
-  "nemo_rl/data/datasets/response_datasets/helpsteer3.py",
   "nemo_rl/data/datasets/response_datasets/refcoco.py",
   "nemo_rl/data/datasets/response_datasets/response_dataset.py",
   "nemo_rl/data/datasets/response_datasets/squad.py",
@@ -82,8 +84,8 @@ project-includes = [
   "nemo_rl/distributed/virtual_cluster.py",
   "nemo_rl/distributed/worker_group_utils.py",
   "nemo_rl/environments/__init__.py",
-  "nemo_rl/environments/games/sliding_puzzle.py",
   "nemo_rl/environments/code_jaccard_environment.py",
+  "nemo_rl/environments/games/sliding_puzzle.py",
   "nemo_rl/environments/interfaces.py",
   "nemo_rl/environments/math_environment.py",
   "nemo_rl/environments/metrics.py",
@@ -95,6 +97,7 @@ project-includes = [
   "nemo_rl/experience/__init__.py",
   "nemo_rl/experience/rollouts.py",
   "nemo_rl/models/__init__.py",
+  "nemo_rl/models/automodel/__init__.py",
   "nemo_rl/models/dtensor/__init__.py",
   "nemo_rl/models/dtensor/parallelize.py",
   "nemo_rl/models/generation/__init__.py",
@@ -104,6 +107,8 @@ project-includes = [
   "nemo_rl/models/generation/vllm/quantization/fp8_train_utils.py",
   "nemo_rl/models/generation/vllm/utils.py",
   "nemo_rl/models/generation/vllm/vllm_backend.py",
+  "nemo_rl/models/generation/sglang/__init__.py",
+  "nemo_rl/models/generation/sglang/config.py",
   "nemo_rl/models/huggingface/__init__.py",
   "nemo_rl/models/megatron/__init__.py",
   "nemo_rl/models/megatron/community_import.py",
diff --git a/tests/functional/L1_Functional_Tests_GPU.sh b/tests/functional/L1_Functional_Tests_GPU.sh
index ec7527f583..095a01c447 100644
--- a/tests/functional/L1_Functional_Tests_GPU.sh
+++ b/tests/functional/L1_Functional_Tests_GPU.sh
@@ -31,6 +31,7 @@ time uv run --no-sync bash ./tests/functional/grpo_megatron.sh
 time uv run --no-sync bash ./tests/functional/grpo_megatron_generation.sh
 time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
 time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
+time uv run --no-sync bash ./tests/functional/grpo_sglang.sh
 time uv run --no-sync bash ./tests/functional/dpo.sh
 time uv run --no-sync bash ./tests/functional/rm.sh
 time uv run --no-sync bash ./tests/functional/eval.sh
diff --git a/tests/functional/distillation.sh b/tests/functional/distillation.sh
index 19cb71252c..195e3fc3a5 100644
--- a/tests/functional/distillation.sh
+++ b/tests/functional/distillation.sh
@@ -37,7 +37,9 @@ uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJE
     distillation.max_val_samples=16 \
     distillation.val_batch_size=8 \
     distillation.val_period=3 \
-    data.dataset_name=OpenMathInstruct-2 \
+    data.train.dataset_name=OpenMathInstruct-2 \
+    ++data.train.split_validation_size=0.05 \
+    data.validation=null \
     loss_fn.zero_outside_topk=true \
     logger.tensorboard_enabled=true \
     logger.log_dir=$LOG_DIR \
diff --git a/tests/functional/distillation_megatron.sh b/tests/functional/distillation_megatron.sh
index b56ea672fb..d40516d939 100644
--- a/tests/functional/distillation_megatron.sh
+++ b/tests/functional/distillation_megatron.sh
@@ -40,7 +40,9 @@ uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJE
     distillation.max_val_samples=16 \
     distillation.val_batch_size=8 \
     distillation.val_period=3 \
-    data.dataset_name=OpenMathInstruct-2 \
+    data.train.dataset_name=OpenMathInstruct-2 \
+    ++data.train.split_validation_size=0.05 \
+    data.validation=null \
     loss_fn.zero_outside_topk=false \
     logger.tensorboard_enabled=true \
     logger.log_dir=$LOG_DIR \
diff --git a/tests/functional/grpo_sglang.sh b/tests/functional/grpo_sglang.sh
new file mode 100755
index 0000000000..f17aef1a7f
--- /dev/null
+++ b/tests/functional/grpo_sglang.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run --group test coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
+    --config $PROJECT_ROOT/examples/configs/grpo_math_1B_sglang.yaml \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    policy.train_global_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=1 \
+    policy.generation.sglang_cfg.gpus_per_server=1 \
+    grpo.max_num_steps=2 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
+
diff --git a/tests/test_suites/README.md b/tests/test_suites/README.md
index e13b330c05..3d3ca9b5b4 100644
--- a/tests/test_suites/README.md
+++ b/tests/test_suites/README.md
@@ -1,5 +1,16 @@
 # Recipes
 
+## Test Suites
+
+Test suites are defined in `.txt` files that list the test scripts to run:
+
+- `nightly.txt` - H100 tests for nightly CI (8 GPUs per node)
+- `release.txt` - H100 tests for release CI (8 GPUs per node)
+- `nightly_gb200.txt` - GB200 tests for nightly CI (4 GPUs per node)
+- `release_gb200.txt` - GB200 tests for release CI (4 GPUs per node)
+- `performance_h100.txt` - Performance benchmarks for H100 (8 GPUs per node)
+- `performance_gb200.txt` - Performance benchmarks for GB200 (4 GPUs per node)
+
 ## Naming
 
 Base pattern (LLM):
@@ -58,6 +69,26 @@ ls -lh llm/sft-llama3.2-1b-1n8g-fsdp2tp1/
 # -rw-r--r-- 1 terryk dip  94K Apr 23 18:23 run.log
 ```
 
+## GB200 Variants
+
+For GB200 systems with 4 GPUs per node, test scripts should include `GPUS_PER_NODE=4` in the CONFIG section. This ensures the launch script uses the correct GPU count for slurm allocation and GPU hour calculations:
+
+```sh
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4    # 4 for GB200, 8 for H100 (default)
+STEPS_PER_RUN=450
+MAX_STEPS=450
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))
+NUM_MINUTES=120
+# ===== END CONFIG =====
+```
+
+GB200 YAML configs should inherit from their 8g counterparts and override:
+- `cluster.gpus_per_node: 4`
+- Any parallelism settings that need to change (e.g., halving `tensor_parallel_size`)
+- Directory/name references updated to reflect the 4g naming
+
 ## Launching with code snapshots
 
 We provide a convenience script that will create a code snapshot and launch `NUM_RUNS` number of slurm jobs (`NUM_RUNS` is defined in the script itself). We create a code snapshot to
diff --git a/tests/test_suites/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.sh b/tests/test_suites/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.sh
new file mode 100755
index 0000000000..eaccb1864b
--- /dev/null
+++ b/tests/test_suites/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=16
+GPUS_PER_NODE=4
+STEPS_PER_RUN=6
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["20"] < 1.05' \
+        'data["train/reward"]["20"] > -0.45' \
+        'data["train/filtered_reward"]["20"] > -0.2'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.sh
new file mode 100755
index 0000000000..1cce37ba8f
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    distillation.val_period=20 \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["10"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 500'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.sh
new file mode 100755
index 0000000000..2a430cfa88
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    distillation.val_period=20 \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["10"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 75' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 500'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.sh
new file mode 100755
index 0000000000..a98bab6e6f
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=100
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=140
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["100"] < 0.25' \
+        'data["validation/accuracy"]["100"] > 0.2' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 1600'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.sh
new file mode 100755
index 0000000000..cbcb22f875
--- /dev/null
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_dpo.py \
+    --config $CONFIG_PATH \
+    dpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 3.6' \
+        'data["train/loss"]["20"] < 3.4' \
+        'data["train/preference_loss"]["1"] > 0.69314' \
+        'data["train/preference_loss"]["1"] < 0.69316' \
+        'data["train/preference_loss"]["20"] < 0.6' \
+        'mean(data["timing/train/total_step_time"], -10, -1) < 7.8'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.sh
new file mode 100755
index 0000000000..3db1458c8b
--- /dev/null
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_dpo.py \
+    --config $CONFIG_PATH \
+    dpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 3.6' \
+        'data["train/loss"]["20"] < 3.4' \
+        'data["train/preference_loss"]["1"] > 0.69314' \
+        'data["train/preference_loss"]["1"] < 0.69316' \
+        'data["train/preference_loss"]["20"] < 0.6' \
+        'mean(data["timing/train/total_step_time"], -10) < 6.7'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.sh b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.sh
new file mode 100755
index 0000000000..7c6821bfb2
--- /dev/null
+++ b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=150
+MAX_STEPS=150
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=45
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_dpo.py \
+    --config $CONFIG_PATH \
+    dpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] > 0.69314' \
+        'data["train/loss"]["1"] < 0.69316' \
+        'data["train/loss"]["150"] < 0.55' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 1.3'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-dapomath17k-dsv3-32n4g-megatron.sh b/tests/test_suites/llm/grpo-dapomath17k-dsv3-32n4g-megatron.sh
new file mode 100755
index 0000000000..49ba5230de
--- /dev/null
+++ b/tests/test_suites/llm/grpo-dapomath17k-dsv3-32n4g-megatron.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=32
+GPUS_PER_NODE=4
+STEPS_PER_RUN=60
+MAX_STEPS=60
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Use the DeepSeek-V3 checkpoint converted to BF16.
+if [[ -z "$NRL_DEEPSEEK_V3_BF16_CKPT" ]]; then
+    echo "Need to set NRL_DEEPSEEK_V3_BF16_CKPT to the path of DeepSeek-V3 checkpoint converted to BF16. See docs/guides/deepseek.md for more details."
+    exit 1
+fi
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    policy.model_name=$NRL_DEEPSEEK_V3_BF16_CKPT \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    "$@" \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/gen_kl_error"]) < 0.002' \
+        'data["train/reward"]["60"] > 0.60' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 210'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-1n4g-8K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-1n4g-8K.sh
new file mode 100755
index 0000000000..ad7c363bfa
--- /dev/null
+++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-1n4g-8K.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20
+MAX_STEPS=40
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=150
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.05' \
+        "data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.05"
+fi
+
+# Convert 8k checkpoint
+uv run examples/converters/convert_dcp_to_hf.py \
+  --config=$CKPT_DIR/step_${MAX_STEPS}/config.yaml \
+  --dcp-ckpt-path=$CKPT_DIR/step_${MAX_STEPS}/policy/weights \
+  --hf-ckpt-path=$CKPT_DIR/grpo-deepscaler-8k-${MAX_STEPS}-hf
+
+# Run eval
+uv run examples/run_eval.py \
+    generation.model_name=$CKPT_DIR/grpo-deepscaler-8k-${MAX_STEPS}-hf \
+    data.prompt_file=examples/prompts/cot.txt \
+    generation.vllm_cfg.max_model_len=32768 \
+    generation.vllm_cfg.enforce_eager=True \
+    generation.temperature=1.0 \
+    eval.num_tests_per_prompt=16 \
+    2>&1 | tee ${RUN_LOG}.aime-8k
+
+cat ${RUN_LOG}.aime-8k       | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-8k-metric.json
+
+# 0.2 is the baseline score for AIME on the base checkpoint
+uv run tests/check_metrics.py ${RUN_LOG}-8k-metric.json \
+  'data["score"] >= 0.2396'
+
+# Clean up checkpoint directory after successful run to save space.
+rm -rf "$CKPT_DIR"
+
diff --git a/tests/test_suites/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.sh
new file mode 100755
index 0000000000..f7302b01f4
--- /dev/null
+++ b/tests/test_suites/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=400
+MAX_STEPS=400
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        "data[\"train/token_mult_prob_error\"][\"${MAX_STEPS}\"] < 1.1" \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 14'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.sh b/tests/test_suites/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.sh
new file mode 100755
index 0000000000..766d143c65
--- /dev/null
+++ b/tests/test_suites/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+GPUS_PER_NODE=4
+STEPS_PER_RUN=10
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["20"] < 1.1'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-gptoss-20b-8n4g-megatron.sh b/tests/test_suites/llm/grpo-gptoss-20b-8n4g-megatron.sh
new file mode 100755
index 0000000000..9b4e3ab439
--- /dev/null
+++ b/tests/test_suites/llm/grpo-gptoss-20b-8n4g-megatron.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=60
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=155
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    "$@" \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/gen_kl_error"]) < 0.002' \
+        'data["train/reward"]["60"] > 0.60' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 210'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.sh
new file mode 100755
index 0000000000..5957ceefe1
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.sh
new file mode 100755
index 0000000000..1cf423aa15
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=100
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["100"] < 1.1'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.sh
new file mode 100755
index 0000000000..2d02130994
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=150
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.sh
new file mode 100755
index 0000000000..6bfec4ee44
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'data["train/reward"]["500"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10.5'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.sh
new file mode 100755
index 0000000000..abd7cb6973
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    policy.generation.backend=megatron \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'data["train/reward"]["500"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10.5'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-moonlight-16ba3b-4n4g-megatron.sh b/tests/test_suites/llm/grpo-moonlight-16ba3b-4n4g-megatron.sh
new file mode 100755
index 0000000000..b063153507
--- /dev/null
+++ b/tests/test_suites/llm/grpo-moonlight-16ba3b-4n4g-megatron.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+PYTHONPATH=$HF_HOME/modules:$PYTHONPATH uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+        'mean(data["train/reward"]) > 0.45' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 70'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-nano-v2-12b-1n4g-megatron.sh b/tests/test_suites/llm/grpo-nano-v2-12b-1n4g-megatron.sh
new file mode 100755
index 0000000000..2ef4a4a6ce
--- /dev/null
+++ b/tests/test_suites/llm/grpo-nano-v2-12b-1n4g-megatron.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.05' \
+        'data["train/token_mult_prob_error"]["30"] < 1.05' \
+        'data["train/reward"]["30"] > 0.4' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 80'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.sh
new file mode 100755
index 0000000000..3263e35bff
--- /dev/null
+++ b/tests/test_suites/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.05' \
+        'data["train/token_mult_prob_error"]["30"] < 1.05' \
+        'data["train/reward"]["30"] > 0.4' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 60'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.sh
new file mode 100755
index 0000000000..f674602a23
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=32
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["20"] < 1.1'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.sh
new file mode 100755
index 0000000000..edeb38d6b5
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+        'data["train/grad_norm"]["30"] < 0.5' \
+        'data["train/grad_norm"]["30"] > 0.1'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.sh
new file mode 100755
index 0000000000..4a3a4e3954
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=40
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+	'mean(data["train/reward"]) > 0.56' \
+        'mean(data["timing/train/total_step_time"], 2) < 50'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.sh
new file mode 100755
index 0000000000..02f1da0d62
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=450
+MAX_STEPS=450
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["450"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], 2) < 25'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.sh b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.sh
new file mode 100755
index 0000000000..30f66ade8f
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=450
+MAX_STEPS=450
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+# Using the same metrics thresholds as the vllm version to verify alignment
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["450"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], 2) < 25'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
+
diff --git a/tests/test_suites/llm/grpo-qwen3-0.6b-1n8g-sglang.sh b/tests/test_suites/llm/grpo-qwen3-0.6b-1n8g-sglang.sh
new file mode 100755
index 0000000000..8db4dc52f3
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen3-0.6b-1n8g-sglang.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], 2) < 30'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/grpo-qwen3-30ba3b-8n4g-megatron.sh b/tests/test_suites/llm/grpo-qwen3-30ba3b-8n4g-megatron.sh
new file mode 100755
index 0000000000..3d97d41420
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen3-30ba3b-8n4g-megatron.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+GPUS_PER_NODE=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'median(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/reward"]["30"] > 0.43' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 220'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.sh
new file mode 100755
index 0000000000..0de5a124ed
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.sh b/tests/test_suites/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.sh
new file mode 100755
index 0000000000..689a79883b
--- /dev/null
+++ b/tests/test_suites/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=50
+MAX_STEPS=50
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 7.0' \
+        'data["train/loss"]["50"] < 0.4' \
+        'data["train/grad_norm"]["50"] < 17.5' \
+        'data["train/grad_norm"]["50"] > 10.0'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-gpt-oss-20b-1n8g-fsdp8ep8-automodel.sh b/tests/test_suites/llm/sft-gpt-oss-20b-1n8g-fsdp8ep8-automodel.sh
index 8fb4d1b80b..2b37048106 100755
--- a/tests/test_suites/llm/sft-gpt-oss-20b-1n8g-fsdp8ep8-automodel.sh
+++ b/tests/test_suites/llm/sft-gpt-oss-20b-1n8g-fsdp8ep8-automodel.sh
@@ -19,7 +19,7 @@ uv run examples/run_sft.py \
     sft.max_num_steps=$MAX_STEPS \
     logger.log_dir=$LOG_DIR \
     logger.wandb_enabled=True \
-    logger.wandb.project=ruit_personal_debug \
+    logger.wandb.project=nemo-rl \
     logger.wandb.name=$EXP_NAME \
     logger.monitor_gpus=True \
     logger.tensorboard_enabled=True \
diff --git a/tests/test_suites/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.sh b/tests/test_suites/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.sh
new file mode 100755
index 0000000000..8e0b208e04
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+GPUS_PER_NODE=4
+STEPS_PER_RUN=300
+MAX_STEPS=300
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=170
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: the memory check is known to OOM. see https://github.com/NVIDIA-NeMo/RL/issues/263
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.55' \
+        'data["train/loss"]["300"] < 0.285' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], 2) < 20'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.sh
new file mode 100755
index 0000000000..f2ffdf0985
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=250
+MAX_STEPS=250
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=130
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: the memory check is known to OOM. see https://github.com/NVIDIA-NeMo/RL/issues/263
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["250"] < 0.36' \
+	    'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \
+        'mean(data["timing/train/total_step_time"], 2) < 22'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.sh b/tests/test_suites/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.sh
new file mode 100755
index 0000000000..61bb596e4e
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=250
+MAX_STEPS=250
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=25
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.82' \
+        'mean(data["train/loss"],-10,-1) < 0.58' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 25' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 0.7'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.sh b/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.sh
new file mode 100755
index 0000000000..fa0f5b5292
--- /dev/null
+++ b/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20  # step_time ~ 10sec
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60 # Usually 15 minutes is enough for 20 steps, but we add a buffer of 3 minutes in metrics check
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["20"] < 2.05' \
+        'mean(data["timing/train/total_step_time"], 2) < 18'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2.sh b/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2.sh
new file mode 100755
index 0000000000..60182d05ee
--- /dev/null
+++ b/tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=20  # step_time ~ 15sec
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["20"] < 2.05' \
+        'mean(data["timing/train/total_step_time"], 2) < 15'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/llm/sft-qwen2.5-math7b-2n4g-megatron.sh b/tests/test_suites/llm/sft-qwen2.5-math7b-2n4g-megatron.sh
new file mode 100755
index 0000000000..9f6ded9760
--- /dev/null
+++ b/tests/test_suites/llm/sft-qwen2.5-math7b-2n4g-megatron.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# TODO: this config can crash on OOM
+# https://github.com/NVIDIA-NeMo/RL/issues/263
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+GPUS_PER_NODE=4
+STEPS_PER_RUN=80  # step_time ~ 29sec
+MAX_STEPS=80
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    ~policy.tokenizer.chat_template \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["80"] < 0.301' \
+        'data["validation/val_loss"]["80"] < 0.304'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt
index 4c93e4fcb9..9e37d7ed01 100644
--- a/tests/test_suites/nightly.txt
+++ b/tests/test_suites/nightly.txt
@@ -7,6 +7,10 @@ tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
 tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
 tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
 
+# SGLang backend
+tests/test_suites/llm/grpo-qwen3-0.6b-1n8g-sglang.sh
+tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1-sglang.sh
+
 # Dtensor (Qwen/Qwen2.5-7B-Instruct)
 tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
 
diff --git a/tests/test_suites/nightly_gb200.txt b/tests/test_suites/nightly_gb200.txt
new file mode 100644
index 0000000000..1c4cd296bc
--- /dev/null
+++ b/tests/test_suites/nightly_gb200.txt
@@ -0,0 +1,73 @@
+########
+# GRPO #
+########
+
+# Short 1N/1B runs (go past 200 steps - usually divergence happens by now) -- going to 4 nodes doesn't help that much
+tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n4g-fsdp2tp1.v3.sh
+tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v3.sh
+tests/test_suites/llm/grpo-gemma3-1b-it-1n4g-fsdp2tp1.sh
+
+# Dtensor (Qwen/Qwen2.5-7B-Instruct)
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-fsdp2tp2.v3.sh
+
+# Megatron
+tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron.sh
+tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n4g-megatron_generation.sh
+
+# Functional moonlight run
+tests/test_suites/llm/grpo-moonlight-16ba3b-4n4g-megatron.sh
+
+# Functional VLM run
+tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.sh
+tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.sh
+
+# Deepscaler (short tests)
+tests/test_suites/llm/grpo-deepscaler-1.5b-1n4g-8K.sh
+
+# Non-colocated
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n4g-fsdp2tp1-noncolocated.sh
+
+# Nano-v2
+tests/test_suites/llm/grpo-nano-v2-12b-1n4g-megatron.sh
+tests/test_suites/llm/grpo-nano-v2-12b-2n4g-fsdp2tp1.sh
+
+#######
+# SFT #
+#######
+
+# 1N 1B/8B runs
+tests/test_suites/llm/sft-llama3.2-1b-1n4g-fsdp2tp1.v3.sh
+
+# Megatron
+# validate TP/DP
+tests/test_suites/llm/sft-qwen2.5-math7b-2n4g-megatron.sh
+
+# gpt-oss 20b DeepEP test
+tests/test_suites/llm/sft-gpt-oss-20b-1n4g-fsdp4ep4-automodel.sh
+
+# Nemotron 3 Nano 30B A3B Base BF16 tests
+tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2.sh
+tests/test_suites/llm/sft-nanov3-30BA3B-2n4g-fsdp2-lora.sh
+
+#######
+# DPO #
+#######
+
+# 1N dtensor
+tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n4g-fsdp2tp1.v2.sh
+
+# Short dtensor
+tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-quick.v2.sh
+
+# Short megatron
+tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n4g-megatrontp1pp2-quick.sh
+
+################
+# Distillation #
+################
+
+# Distillation tests
+tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-fsdp2tp1.v1.sh
+
+# Short megatron
+tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n4g-megatron-tp1pp2cp2-pack.sh
diff --git a/tests/test_suites/performance_h100.txt b/tests/test_suites/performance_h100.txt
index 54a75dc86e..ee8fbf7c28 100644
--- a/tests/test_suites/performance_h100.txt
+++ b/tests/test_suites/performance_h100.txt
@@ -7,6 +7,7 @@
 ## SYNC
 tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh
 tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh
+tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-40K.sh
 tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh
 tests/test_suites/llm/performance/grpo-qwen3-32b-4n8g.sh
 tests/test_suites/llm/performance/grpo-qwen3-235b-16n8g.sh
diff --git a/tests/test_suites/release_gb200.txt b/tests/test_suites/release_gb200.txt
new file mode 100644
index 0000000000..59cee6f24c
--- /dev/null
+++ b/tests/test_suites/release_gb200.txt
@@ -0,0 +1,44 @@
+########
+# GRPO #
+########
+
+# Megatron (Qwen/Qwen2.5-7B-Instruct)
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n4g-megatron.sh
+
+# Long 8b run
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n4g-fsdp2tp1-long.v3.sh
+
+# Long 32b run
+tests/test_suites/llm/grpo-qwen2.5-32b-32n4g-fsdp2tp4-actckpt-long.v3.sh
+
+# Long Gemma3 27b run
+tests/test_suites/llm/grpo-gemma3-27b-it-8n4g-fsdp2tp4-actckpt-long.sh
+
+# Long Megatron Qwen3 30B-A3B run
+tests/test_suites/llm/grpo-qwen3-30ba3b-8n4g-megatron.sh
+
+# DAPO 4h run
+tests/test_suites/llm/dapo-qwen2.5-7b-16n4g-fsdp2cp2.sh
+
+# Deepseek-V3 on DAPO dataset
+tests/test_suites/llm/grpo-dapomath17k-dsv3-32n4g-megatron.sh
+
+# GPT-OSS
+tests/test_suites/llm/grpo-gptoss-20b-8n4g-megatron.sh
+
+#######
+# SFT #
+#######
+
+# Long 8b convergence
+tests/test_suites/llm/sft-llama3.1-8b-1n4g-fsdp2tp1-long.sh
+
+# 300 step 70b convergence
+tests/test_suites/llm/sft-llama3.1-70b-8n4g-tp2pp2-long-megatron.sh
+
+################
+# Distillation #
+################
+
+# Long 4b convergence
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n4g-fsdp2tp1-long.v1.sh
diff --git a/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.sh b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.sh
new file mode 100755
index 0000000000..842bfc9fa5
--- /dev/null
+++ b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-dtensor2tp1.v1.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=200
+MAX_STEPS=200
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_vlm_grpo.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/reward"]["200"] > 0.9'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.sh b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.sh
new file mode 100755
index 0000000000..7b15555457
--- /dev/null
+++ b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n4g-megatrontp1.v1.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+GPUS_PER_NODE=4
+STEPS_PER_RUN=200
+MAX_STEPS=200
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_vlm_grpo.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["200"] < 0.1' \
+        'data["train/reward"]["200"] > 0.9'
+
+    # Clean up checkpoint directory after successful run to save space.
+    rm -rf "$CKPT_DIR"
+fi
+
diff --git a/tests/unit/L0_Unit_Tests_Generation.sh b/tests/unit/L0_Unit_Tests_Generation.sh
index e7b7a6e2ca..d30e051c66 100644
--- a/tests/unit/L0_Unit_Tests_Generation.sh
+++ b/tests/unit/L0_Unit_Tests_Generation.sh
@@ -45,3 +45,11 @@ if [[ $exit_code -eq 5 ]]; then
 else
     uv run --extra vllm bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
 fi
+
+# Check and run sglang tests
+exit_code=$(uv run --extra sglang pytest tests/unit/models/generation/ --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No sglang tests to run"
+else
+    uv run --extra sglang bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
+fi
diff --git a/tests/unit/L0_Unit_Tests_Other.sh b/tests/unit/L0_Unit_Tests_Other.sh
index f60c09df03..ee42e7f66a 100644
--- a/tests/unit/L0_Unit_Tests_Other.sh
+++ b/tests/unit/L0_Unit_Tests_Other.sh
@@ -46,6 +46,14 @@ else
     uv run --extra vllm bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
 fi
 
+# Check and run sglang tests
+exit_code=$(uv run --extra sglang pytest tests/unit/ --ignore=tests/unit/models/generation/ --ignore=tests/unit/models/policy/ --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No sglang tests to run"
+else
+    uv run --extra sglang bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
+fi
+
 # Research unit tests
 for i in research/*/tests/unit; do
     project_dir=$(dirname $(dirname $i))
diff --git a/tests/unit/L0_Unit_Tests_Policy.sh b/tests/unit/L0_Unit_Tests_Policy.sh
index bae1178c72..ffc50512ea 100644
--- a/tests/unit/L0_Unit_Tests_Policy.sh
+++ b/tests/unit/L0_Unit_Tests_Policy.sh
@@ -45,3 +45,11 @@ if [[ $exit_code -eq 5 ]]; then
 else
     uv run --extra vllm bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
 fi
+
+# Check and run sglang tests
+exit_code=$(uv run --extra sglang pytest tests/unit/models/policy/ --collect-only --hf-gated --sglang-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No sglang tests to run"
+else
+    uv run --extra sglang bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --sglang-only
+fi
diff --git a/tests/unit/algorithms/test_grpo.py b/tests/unit/algorithms/test_grpo.py
index dcd1303afe..c19d3153dc 100644
--- a/tests/unit/algorithms/test_grpo.py
+++ b/tests/unit/algorithms/test_grpo.py
@@ -25,6 +25,7 @@
     dynamic_sampling,
     grpo_train,
     normalize_advantages_with_epsilon,
+    validate,
 )
 from nemo_rl.algorithms.loss_functions import ClippedPGLossFn
 from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
@@ -812,6 +813,321 @@ def test_noncolocated_inference_requires_explicit_gpus_per_node_multi_node():
         setup(master_config, tokenizer, dataset, None)
 
 
+@pytest.mark.parametrize(
+    "colocated_inference, expected_parallel",
+    [(True, 0.0), (False, True)],
+)
+def test_setup_sglang_sets_model_path_and_parallel_flag(
+    monkeypatch, colocated_inference, expected_parallel
+):
+    from nemo_rl.algorithms import grpo as grpo_mod
+
+    logged = {}
+
+    class DummyLogger:
+        def log_hyperparams(self, *_args, **_kwargs):
+            pass
+
+        def log_metrics(self, metrics, *_args, **_kwargs):
+            logged["metrics"] = metrics
+
+    class DummyCheckpointer:
+        def get_latest_checkpoint_path(self):
+            return None
+
+        def load_training_info(self, _path):
+            return None
+
+    class DummyLoader:
+        def __init__(self, *_args, **_kwargs):
+            pass
+
+        def __len__(self):
+            return 1
+
+        def load_state_dict(self, _state):
+            pass
+
+    class DummyCluster:
+        def __init__(self, *_args, **_kwargs):
+            pass
+
+        def world_size(self):
+            return 1
+
+        def get_master_address_and_port(self):
+            return "127.0.0.1", 1234
+
+    class DummyPolicy:
+        def print_node_ip_and_gpu_id(self):
+            pass
+
+        def init_collective(self, *_args, **_kwargs):
+            return []
+
+        def prepare_refit_info(self):
+            return {}
+
+    class DummySGLangGeneration:
+        def finish_generation(self):
+            pass
+
+        def prepare_refit_info(self, _state):
+            pass
+
+        def init_collective(self, *_args, **_kwargs):
+            return []
+
+    monkeypatch.setattr(grpo_mod, "Logger", lambda *_args, **_kwargs: DummyLogger())
+    monkeypatch.setattr(
+        grpo_mod, "CheckpointManager", lambda *_args, **_kwargs: DummyCheckpointer()
+    )
+    monkeypatch.setattr(
+        grpo_mod, "ClippedPGLossFn", lambda *_args, **_kwargs: MagicMock()
+    )
+    monkeypatch.setattr(grpo_mod, "StatefulDataLoader", DummyLoader)
+    monkeypatch.setattr(grpo_mod, "RayVirtualCluster", DummyCluster)
+    monkeypatch.setattr(grpo_mod, "Policy", lambda *_args, **_kwargs: DummyPolicy())
+    monkeypatch.setattr(
+        grpo_mod,
+        "SGLangGeneration",
+        lambda *_args, **_kwargs: DummySGLangGeneration(),
+    )
+    monkeypatch.setattr(grpo_mod.ray, "get", lambda x: x)
+
+    generation_resources = {
+        "gpus_per_node": 1,
+        "num_nodes": 1,
+    }
+    if colocated_inference:
+        generation_resources = {"gpus_per_node": None, "num_nodes": None}
+
+    master_config = {
+        "policy": {
+            "model_name": "fake-model",
+            "train_global_batch_size": 1,
+            "train_micro_batch_size": 1,
+            "max_total_sequence_length": 8,
+            "make_sequence_length_divisible_by": 1,
+            "dtensor_cfg": {"enabled": False},
+            "megatron_cfg": {"enabled": False, "pipeline_model_parallel_size": 1},
+            "generation": {
+                "backend": "sglang",
+                "colocated": {
+                    "enabled": colocated_inference,
+                    "resources": generation_resources,
+                },
+                "sglang_cfg": {
+                    "gpus_per_server": 1,
+                    "dp_size": 1,
+                    "pp_size": 1,
+                    "ep_size": 1,
+                },
+            },
+        },
+        "loss_fn": {
+            "force_on_policy_ratio": False,
+            "use_importance_sampling_correction": False,
+        },
+        "env": {},
+        "grpo": {
+            "seed": 1,
+            "num_prompts_per_step": 1,
+            "num_generations_per_prompt": 1,
+            "max_num_steps": 1,
+            "max_num_epochs": 1,
+            "val_period": 0,
+            "val_batch_size": 1,
+            "val_at_start": False,
+            "max_val_samples": 1,
+            "use_dynamic_sampling": False,
+            "batch_multiplier": 1,
+            "normalize_rewards": False,
+            "use_leave_one_out_baseline": False,
+            "reward_scaling": {"enabled": False},
+            "reward_shaping": {"enabled": False},
+            "overlong_filtering": False,
+        },
+        "data": {"shuffle": False, "num_workers": 0, "env_name": None},
+        "logger": {"num_val_samples_to_print": 0},
+        "checkpointing": {"enabled": False},
+        "cluster": {"num_nodes": 1, "gpus_per_node": 4},
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=1)
+
+    grpo_mod.setup(master_config, tokenizer, dataset, None)
+
+    assert (
+        master_config["policy"]["generation"]["sglang_cfg"]["model_path"]
+        == master_config["policy"]["model_name"]
+    )
+    assert logged["metrics"]["parallel_init_enabled"] == expected_parallel
+
+
+def test_refit_policy_generation_sglang_colocated_http(monkeypatch):
+    from nemo_rl.algorithms import grpo as grpo_mod
+
+    calls = {
+        "prepare_for_generation_tags": [],
+        "invalidate_kv_cache": 0,
+        "stream_weights_via_http": [],
+        "offload_before_refit": 0,
+        "offload_after_refit": 0,
+    }
+
+    class DummySGLangGeneration:
+        def prepare_for_generation(self, tags=None):
+            calls["prepare_for_generation_tags"].append(tags)
+
+        def get_sglang_url_to_gpu_uuids(self):
+            return {"http://localhost:12345": ["gpu-uuid-0"]}
+
+        def invalidate_kv_cache(self):
+            calls["invalidate_kv_cache"] += 1
+            return True
+
+    class DummyPolicy:
+        def offload_before_refit(self):
+            calls["offload_before_refit"] += 1
+
+        def offload_after_refit(self):
+            calls["offload_after_refit"] += 1
+
+        def get_free_memory_bytes(self):
+            return 1024 * 1024 * 1024
+
+        def stream_weights_via_http(self, sglang_url_to_gpu_uuids):
+            calls["stream_weights_via_http"].append(sglang_url_to_gpu_uuids)
+            return ["ok"]
+
+    monkeypatch.setattr(grpo_mod, "SGLangGeneration", DummySGLangGeneration)
+    monkeypatch.setattr(grpo_mod.ray, "get", lambda x: x)
+
+    grpo_mod.refit_policy_generation(
+        policy=DummyPolicy(),
+        policy_generation=DummySGLangGeneration(),
+        colocated_inference=True,
+    )
+
+    assert calls["offload_before_refit"] == 1
+    assert calls["offload_after_refit"] == 1
+    assert calls["invalidate_kv_cache"] == 1
+    assert calls["stream_weights_via_http"] == [
+        {"http://localhost:12345": ["gpu-uuid-0"]}
+    ]
+    assert calls["prepare_for_generation_tags"] == [["weights"], ["kv_cache"]]
+
+
+def test_refit_policy_generation_sglang_non_colocated_raises(monkeypatch):
+    from nemo_rl.algorithms import grpo as grpo_mod
+
+    class DummySGLangGeneration:
+        pass
+
+    monkeypatch.setattr(grpo_mod, "SGLangGeneration", DummySGLangGeneration)
+
+    with pytest.raises(NotImplementedError):
+        grpo_mod.refit_policy_generation(
+            policy=object(),
+            policy_generation=DummySGLangGeneration(),
+            colocated_inference=False,
+        )
+
+
+def test_grpo_train_collects_generation_logger_metrics(
+    monkeypatch, mock_grpo_components
+):
+    from nemo_rl.algorithms import grpo as grpo_mod
+
+    policy_generation = MagicMock()
+    policy_generation.clear_logger_metrics = MagicMock()
+    policy_generation.get_logger_metrics = MagicMock(
+        return_value={"pending_requests": 1}
+    )
+    policy_generation.prepare_for_generation = MagicMock()
+    policy_generation.finish_generation = MagicMock()
+
+    mock_batch = next(iter(mock_grpo_components["train_dataloader"]))
+    mock_rollout_metrics = {"gen_kl_error": 0.0, "mean_gen_tokens_per_sample": 2.0}
+
+    def fake_batched_message_log_to_flat_message(*_args, **_kwargs):
+        flat = BatchedDataDict(
+            {
+                "token_ids": torch.tensor([[1, 2]]),
+                "advantages": torch.tensor([[0.5, 0.5]]),
+                "generation_logprobs": torch.tensor([[0.0, 0.0]]),
+                "token_loss_mask": torch.tensor([[1, 1]]),
+                "content": ["ok"],
+            }
+        )
+        return flat, torch.tensor([2])
+
+    monkeypatch.setattr(
+        grpo_mod,
+        "batched_message_log_to_flat_message",
+        fake_batched_message_log_to_flat_message,
+    )
+    monkeypatch.setattr(
+        grpo_mod, "_should_use_async_rollouts", lambda *_args, **_kwargs: True
+    )
+    monkeypatch.setattr(
+        grpo_mod,
+        "run_async_multi_turn_rollout",
+        lambda *_args, **_kwargs: (mock_batch, mock_rollout_metrics),
+    )
+    monkeypatch.setattr(
+        grpo_mod,
+        "run_multi_turn_rollout",
+        lambda *_args, **_kwargs: (mock_batch, mock_rollout_metrics),
+    )
+    monkeypatch.setattr(
+        grpo_mod,
+        "calculate_baseline_and_std_per_prompt",
+        lambda *_args, **_kwargs: (torch.tensor([0.1]), torch.tensor([1.0])),
+    )
+    monkeypatch.setattr(
+        grpo_mod, "refit_policy_generation", lambda *_args, **_kwargs: None
+    )
+    monkeypatch.setattr(
+        grpo_mod, "print_performance_metrics", lambda *_args, **_kwargs: {}
+    )
+    monkeypatch.setattr(
+        grpo_mod, "maybe_gpu_profile_step", lambda *_args, **_kwargs: None
+    )
+
+    master_config = mock_grpo_components["master_config"]
+    master_config["grpo"]["max_num_steps"] = 1
+    master_config["grpo"]["max_num_epochs"] = 1
+    master_config["grpo"]["val_period"] = 0
+    master_config["grpo"]["val_at_start"] = False
+    master_config["grpo"]["use_dynamic_sampling"] = False
+
+    grpo_mod.grpo_train(
+        mock_grpo_components["policy"],
+        policy_generation,
+        mock_grpo_components["train_dataloader"],
+        mock_grpo_components["val_dataloader"],
+        mock_grpo_components["tokenizer"],
+        mock_grpo_components["loss_fn"],
+        mock_grpo_components["task_to_env"],
+        mock_grpo_components["val_task_to_env"],
+        mock_grpo_components["logger"],
+        mock_grpo_components["checkpointer"],
+        _default_grpo_save_state(),
+        master_config,
+    )
+
+    assert policy_generation.clear_logger_metrics.called
+    assert policy_generation.get_logger_metrics.called
+    assert any(
+        "generation_logger_metrics" in call.args[0]
+        for call in mock_grpo_components["logger"].log_metrics.call_args_list
+    )
+
+
 @pytest.fixture
 def mock_grpo_components():
     # Create mock components
@@ -1347,3 +1663,238 @@ def test_normalize_advantages_with_small_nonzero_std():
 
     # All should be normalized since std > 0
     assert torch.allclose(result, expected, rtol=1e-5)
+
+
+# ============================================================================
+# Tests for validate function
+# ============================================================================
+
+
+class TestValidateFunction:
+    """Tests for the validate() function."""
+
+    def test_validate_logs_data_when_logger_provided(self, tmp_path):
+        """Test that validation data is logged to JSONL when logger is provided."""
+
+        # Create mock components
+        mock_policy_gen = MagicMock()
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.pad_token_id = 0
+
+        # Create mock batch
+        mock_batch = BatchedDataDict[DatumSpec](
+            {
+                "message_log": [
+                    [
+                        {
+                            "role": "user",
+                            "content": "test1",
+                            "token_ids": torch.tensor([1, 2, 3]),
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "response1",
+                            "token_ids": torch.tensor([4, 5, 6]),
+                        },
+                    ],
+                    [
+                        {
+                            "role": "user",
+                            "content": "test2",
+                            "token_ids": torch.tensor([7, 8, 9]),
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "response2",
+                            "token_ids": torch.tensor([10, 11, 12]),
+                        },
+                    ],
+                ],
+                "task_name": ["math", "math"],
+                "extra_env_info": [{}, {}],
+                "loss_multiplier": torch.tensor([1.0, 1.0]),
+                "idx": torch.tensor([0, 1]),
+                "length": torch.tensor([6, 6]),
+                "total_reward": torch.tensor([1.0, 0.5]),
+            }
+        )
+
+        # Create mock dataloader that yields mock_batch
+        mock_dataloader = MagicMock(spec=StatefulDataLoader)
+        mock_dataloader.__iter__ = MagicMock(return_value=iter([mock_batch]))
+
+        # Create mock environment
+        mock_env = MagicMock(spec=EnvironmentInterface)
+        mock_env.global_post_process_and_metrics.return_value = (mock_batch, {})
+
+        # Create mock logger that captures calls
+        mock_logger = MagicMock()
+        logged_data = {}
+
+        def capture_log(data, filename):
+            logged_data["data"] = data
+            logged_data["filename"] = filename
+
+        mock_logger.log_batched_dict_as_jsonl = MagicMock(side_effect=capture_log)
+
+        # Mock config
+        mock_config = {
+            "grpo": {
+                "max_val_samples": 10,
+                "val_batch_size": 2,
+                "max_rollout_turns": 1,
+            },
+            "policy": {
+                "max_total_sequence_length": 2048,
+                "generation": {
+                    "backend": "vllm",
+                    "colocated": {"enabled": True},
+                    "vllm_cfg": {"async_engine": False},
+                },
+            },
+            "logger": {
+                "num_val_samples_to_print": 2,
+            },
+        }
+
+        mock_rollout_metrics = {"mean_gen_tokens_per_sample": 10.0}
+
+        with patch("nemo_rl.algorithms.grpo.run_multi_turn_rollout") as mock_rollout:
+            mock_rollout.return_value = (mock_batch, mock_rollout_metrics)
+            with patch(
+                "nemo_rl.algorithms.grpo._should_use_nemo_gym", return_value=False
+            ):
+                with patch(
+                    "nemo_rl.algorithms.grpo._should_use_async_rollouts",
+                    return_value=False,
+                ):
+                    with patch("nemo_rl.algorithms.grpo.print_message_log_samples"):
+                        val_metrics, timing = validate(
+                            mock_policy_gen,
+                            mock_dataloader,
+                            mock_tokenizer,
+                            {"math": mock_env},
+                            step=5,
+                            master_config=mock_config,
+                            logger=mock_logger,
+                        )
+
+        # Verify log_batched_dict_as_jsonl was called
+        mock_logger.log_batched_dict_as_jsonl.assert_called_once()
+
+        # Verify the filename
+        assert logged_data["filename"] == "val_data_step5.jsonl"
+
+        # Verify the data structure
+        assert "content" in logged_data["data"]
+        assert "rewards" in logged_data["data"]
+
+    def test_validate_works_without_logger(self):
+        """Test that validation works when logger is None (backward compat)."""
+        # Create mock components
+        mock_policy_gen = MagicMock()
+        mock_tokenizer = MagicMock()
+        mock_tokenizer.pad_token_id = 0
+
+        # Create mock batch
+        mock_batch = BatchedDataDict[DatumSpec](
+            {
+                "message_log": [
+                    [
+                        {
+                            "role": "user",
+                            "content": "test1",
+                            "token_ids": torch.tensor([1, 2, 3]),
+                        },
+                        {
+                            "role": "assistant",
+                            "content": "response1",
+                            "token_ids": torch.tensor([4, 5, 6]),
+                        },
+                    ],
+                ],
+                "task_name": ["math"],
+                "extra_env_info": [{}],
+                "loss_multiplier": torch.tensor([1.0]),
+                "idx": torch.tensor([0]),
+                "length": torch.tensor([6]),
+                "total_reward": torch.tensor([1.0]),
+            }
+        )
+
+        # Create mock dataloader
+        mock_dataloader = MagicMock(spec=StatefulDataLoader)
+        mock_dataloader.__iter__ = MagicMock(return_value=iter([mock_batch]))
+
+        # Create mock environment
+        mock_env = MagicMock(spec=EnvironmentInterface)
+        mock_env.global_post_process_and_metrics.return_value = (mock_batch, {})
+
+        # Mock config
+        mock_config = {
+            "grpo": {
+                "max_val_samples": 10,
+                "val_batch_size": 1,
+                "max_rollout_turns": 1,
+            },
+            "policy": {
+                "max_total_sequence_length": 2048,
+                "generation": {
+                    "backend": "vllm",
+                    "colocated": {"enabled": True},
+                    "vllm_cfg": {"async_engine": False},
+                },
+            },
+            "logger": {
+                "num_val_samples_to_print": 1,
+            },
+        }
+
+        mock_rollout_metrics = {"mean_gen_tokens_per_sample": 10.0}
+
+        with patch("nemo_rl.algorithms.grpo.run_multi_turn_rollout") as mock_rollout:
+            mock_rollout.return_value = (mock_batch, mock_rollout_metrics)
+            with patch(
+                "nemo_rl.algorithms.grpo._should_use_nemo_gym", return_value=False
+            ):
+                with patch(
+                    "nemo_rl.algorithms.grpo._should_use_async_rollouts",
+                    return_value=False,
+                ):
+                    with patch("nemo_rl.algorithms.grpo.print_message_log_samples"):
+                        # Call validate without logger (should not raise exception)
+                        val_metrics, timing = validate(
+                            mock_policy_gen,
+                            mock_dataloader,
+                            mock_tokenizer,
+                            {"math": mock_env},
+                            step=5,
+                            master_config=mock_config,
+                            logger=None,
+                        )
+
+        # Verify metrics are returned correctly
+        assert "accuracy" in val_metrics
+        assert "avg_length" in val_metrics
+
+    def test_validate_returns_empty_when_no_dataloader(self):
+        """Test that validate returns empty dicts when no dataloader is provided."""
+        mock_policy_gen = MagicMock()
+        mock_tokenizer = MagicMock()
+
+        mock_config = {
+            "dpo": {"val_period": 0},  # Required for the assertion
+        }
+
+        val_metrics, timing = validate(
+            mock_policy_gen,
+            None,  # No dataloader
+            mock_tokenizer,
+            None,
+            step=0,
+            master_config=mock_config,
+            logger=None,
+        )
+
+        assert val_metrics == {}
+        assert timing == {}
diff --git a/tests/unit/algorithms/test_sft.py b/tests/unit/algorithms/test_sft.py
index 83d0cf20c4..fffd06187d 100644
--- a/tests/unit/algorithms/test_sft.py
+++ b/tests/unit/algorithms/test_sft.py
@@ -61,7 +61,6 @@ def val_iter(self):
     loss_fn = NLLLoss()
     logger = MagicMock()
     checkpointer = MagicMock()
-    sft_task_spec = MagicMock()
 
     # Create mock master config
     master_config = {
@@ -97,7 +96,6 @@ def val_iter(self):
         "loss_fn": loss_fn,
         "logger": logger,
         "checkpointer": checkpointer,
-        "sft_task_spec": sft_task_spec,
         "master_config": master_config,
     }
 
@@ -118,7 +116,6 @@ def test_exit_on_max_steps(mock_components):
         mock_components["loss_fn"],
         mock_components["master_config"],
         mock_components["logger"],
-        mock_components["sft_task_spec"],
         mock_components["checkpointer"],
         sft_save_state,
     )
@@ -144,7 +141,6 @@ def test_exit_on_max_epochs(mock_components):
         mock_components["loss_fn"],
         mock_components["master_config"],
         mock_components["logger"],
-        mock_components["sft_task_spec"],
         mock_components["checkpointer"],
         sft_save_state,
     )
@@ -178,7 +174,6 @@ def test_exit_on_timeout(mock_components, capsys):
             mock_components["loss_fn"],
             mock_components["master_config"],
             mock_components["logger"],
-            mock_components["sft_task_spec"],
             mock_components["checkpointer"],
             sft_save_state,
         )
@@ -223,7 +218,6 @@ def test_training_with_disabled_validation(mock_components):
         mock_components["loss_fn"],
         mock_components["master_config"],
         mock_components["logger"],
-        mock_components["sft_task_spec"],
         mock_components["checkpointer"],
         sft_save_state,
     )
@@ -247,7 +241,6 @@ def test_training_with_negative_val_period(mock_components):
         mock_components["loss_fn"],
         mock_components["master_config"],
         mock_components["logger"],
-        mock_components["sft_task_spec"],
         mock_components["checkpointer"],
         sft_save_state,
     )
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index ab3368185c..ebc5569f86 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -57,6 +57,12 @@ def pytest_addoption(parser):
         default=False,
         help="Run ONLY vllm tests",
     )
+    parser.addoption(
+        "--sglang-only",
+        action="store_true",
+        default=False,
+        help="Run ONLY sglang tests",
+    )
 
 
 def pytest_collection_modifyitems(config, items):
@@ -65,12 +71,18 @@ def pytest_collection_modifyitems(config, items):
     run_mcore_only = config.getoption("--mcore-only")
     run_automodel_only = config.getoption("--automodel-only")
     run_vllm_only = config.getoption("--vllm-only")
+    run_sglang_only = config.getoption("--sglang-only")
 
     # Check for mutually exclusive options
-    exclusive_options = [run_mcore_only, run_automodel_only, run_vllm_only]
+    exclusive_options = [
+        run_mcore_only,
+        run_automodel_only,
+        run_vllm_only,
+        run_sglang_only,
+    ]
     if sum(exclusive_options) > 1:
         raise ValueError(
-            "--mcore-only, --automodel-only, and --vllm-only are mutually exclusive"
+            "--mcore-only, --automodel-only, --vllm-only, and --sglang-only are mutually exclusive"
         )
 
     marker_expr = config.getoption("-m", default="")
@@ -140,6 +152,24 @@ def pytest_collection_modifyitems(config, items):
         # Exclude vllm tests by default
         new_items = [item for item in new_items if not item.get_closest_marker("vllm")]
 
+    # Filter by sglang marker
+    if run_sglang_only:
+        # Validate that sglang is available
+        try:
+            import sglang  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Cannot run sglang tests: sglang is not available.\n"
+                "Please run tests with: uv run --extra sglang --group test pytest ..."
+            )
+        # Include only sglang tests
+        new_items = [item for item in new_items if item.get_closest_marker("sglang")]
+    else:
+        # Exclude sglang tests by default
+        new_items = [
+            item for item in new_items if not item.get_closest_marker("sglang")
+        ]
+
     # Ensure run_first tests are prioritized
     new_items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)
 
diff --git a/tests/unit/data/datasets/test_oai_format_dataset.py b/tests/unit/data/datasets/test_oai_format_dataset.py
index aad989ed15..ef7b000c59 100644
--- a/tests/unit/data/datasets/test_oai_format_dataset.py
+++ b/tests/unit/data/datasets/test_oai_format_dataset.py
@@ -16,9 +16,10 @@
 import tempfile
 
 import pytest
-from transformers import AutoTokenizer
 
+from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.data.datasets import load_response_dataset
 from nemo_rl.data.datasets.response_datasets import OpenAIFormatDataset
 
 
@@ -27,74 +28,73 @@ def sample_data(request):
     chat_key = request.param[0]
     system_key = request.param[1]
 
-    train_data = {
+    data = {
         chat_key: [
             {"role": "user", "content": "What is the capital of France?"},
             {"role": "assistant", "content": "The capital of France is Paris."},
         ],
     }
-    val_data = {
-        chat_key: [
-            {"role": "user", "content": "What is the capital of Germany?"},
-            {"role": "assistant", "content": "The capital of Germany is Berlin."},
-        ],
-    }
 
     if system_key is not None:
-        train_data[system_key] = "You are a helpful assistant."
-    if system_key is not None:
-        val_data[system_key] = "You are a helpful assistant."
+        data[system_key] = "You are a helpful assistant."
 
     # Create temporary files for train and validation data
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as train_file:
-        json.dump(train_data, train_file)
-        train_path = train_file.name
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(data, f)
+        data_path = f.name
+
+    return data_path
 
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as val_file:
-        json.dump(val_data, val_file)
-        val_path = val_file.name
 
-    return train_path, val_path
+@pytest.fixture(scope="function")
+def tokenizer():
+    """Initialize tokenizer for the test model."""
+    tokenizer = get_tokenizer({"name": "Qwen/Qwen3-0.6B"})
+    return tokenizer
 
 
 @pytest.mark.parametrize("sample_data", [("messages", None)], indirect=True)
 def test_dataset_initialization(sample_data):
-    train_path, val_path = sample_data
-    dataset = OpenAIFormatDataset(train_path, val_path)
+    data_path = sample_data
+    data_config = {
+        "dataset_name": "openai_format",
+        "data_path": data_path,
+    }
+    dataset = load_response_dataset(data_config)
 
     assert dataset.chat_key == "messages"
-    assert "train" in dataset.formatted_ds
-    assert "validation" in dataset.formatted_ds
+    assert len(dataset.dataset) == 1
 
 
 @pytest.mark.parametrize("sample_data", [("conversations", None)], indirect=True)
 def test_custom_keys(sample_data):
-    train_path, val_path = sample_data
-    dataset = OpenAIFormatDataset(
-        train_path,
-        val_path,
-        chat_key="conversations",
-        system_prompt="You are a helpful assistant.",
-    )
+    data_path = sample_data
+    data_config = {
+        "dataset_name": "openai_format",
+        "data_path": data_path,
+        "chat_key": "conversations",
+        "system_prompt": "You are a helpful assistant.",
+    }
+    dataset = load_response_dataset(data_config)
 
     assert dataset.chat_key == "conversations"
     assert dataset.system_prompt == "You are a helpful assistant."
 
 
-@pytest.mark.hf_gated
 @pytest.mark.parametrize("sample_data", [("messages", "system_key")], indirect=True)
-def test_message_formatting(sample_data):
-    train_path, val_path = sample_data
+def test_message_formatting(sample_data, tokenizer):
+    # load the dataset
+    data_path = sample_data
     dataset = OpenAIFormatDataset(
-        train_path, val_path, chat_key="messages", system_key="system_key"
+        data_path,
+        chat_key="messages",
+        system_key="system_key",
     )
 
-    first_example = dataset.formatted_ds["train"][0]
+    # check the first example
+    first_example = dataset.dataset[0]
 
+    assert "task_name" in first_example
     assert first_example["messages"][0]["role"] == "system"
     assert first_example["messages"][0]["content"] == "You are a helpful assistant."
     assert first_example["messages"][1]["role"] == "user"
@@ -102,9 +102,8 @@ def test_message_formatting(sample_data):
     assert first_example["messages"][2]["role"] == "assistant"
     assert first_example["messages"][2]["content"] == "The capital of France is Paris."
 
+    # check the combined message
     chat_template = COMMON_CHAT_TEMPLATES.passthrough_prompt_response
-    tokenizer = AutoTokenizer.from_pretrained("Meta-Llama/Meta-Llama-3-8B-Instruct")
-
     combined_message = tokenizer.apply_chat_template(
         first_example["messages"],
         chat_template=chat_template,
diff --git a/tests/unit/data/datasets/test_response_dataset.py b/tests/unit/data/datasets/test_response_dataset.py
index 22bc7168fe..23c7923066 100644
--- a/tests/unit/data/datasets/test_response_dataset.py
+++ b/tests/unit/data/datasets/test_response_dataset.py
@@ -16,100 +16,155 @@
 import tempfile
 
 import pytest
-from transformers import AutoTokenizer
+from datasets import Dataset
 
-from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data.datasets import load_response_dataset
+from nemo_rl.data.datasets.response_datasets.clevr import format_clevr_cogent_dataset
+from nemo_rl.data.datasets.response_datasets.geometry3k import format_geometry3k_dataset
 
 
-@pytest.fixture
-def sample_data(request):
-    input_key = request.param[0]
-    output_key = request.param[1]
-
-    train_data = [
+def create_sample_data(input_key, output_key, is_save_to_disk=False):
+    data = [
         {input_key: "Hello", output_key: "Hi there!"},
         {input_key: "How are you?", output_key: "I'm good, thanks!"},
     ]
-    val_data = [
-        {input_key: "What's up?", output_key: "Not much!"},
-        {input_key: "Bye", output_key: "Goodbye!"},
-    ]
 
-    # Create temporary files for train and validation data
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as train_file:
-        json.dump(train_data, train_file)
-        train_path = train_file.name
+    # Create temporary dataset file
+    if is_save_to_disk:
+        data_path = tempfile.mktemp()
+        dataset = Dataset.from_list(data)
+        dataset.save_to_disk(data_path)
+    else:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(data, f)
+            data_path = f.name
 
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as val_file:
-        json.dump(val_data, val_file)
-        val_path = val_file.name
+    return data_path
 
-    return train_path, val_path
 
+@pytest.fixture(scope="function")
+def tokenizer():
+    """Initialize tokenizer for the test model."""
+    tokenizer = get_tokenizer({"name": "Qwen/Qwen3-0.6B"})
+    return tokenizer
 
-@pytest.mark.parametrize("sample_data", [("input", "output")], indirect=True)
-def test_dataset_initialization(sample_data):
+
+@pytest.mark.parametrize(
+    "input_key,output_key", [("input", "output"), ("question", "answer")]
+)
+@pytest.mark.parametrize("is_save_to_disk", [True, False])
+def test_response_dataset(input_key, output_key, is_save_to_disk, tokenizer):
     # load the dataset
-    train_path, val_path = sample_data
+    data_path = create_sample_data(input_key, output_key, is_save_to_disk)
     data_config = {
         "dataset_name": "ResponseDataset",
-        "train_data_path": train_path,
-        "val_data_path": val_path,
+        "data_path": data_path,
+        "input_key": input_key,
+        "output_key": output_key,
     }
     dataset = load_response_dataset(data_config)
 
-    assert dataset.input_key == "input"
-    assert dataset.output_key == "output"
-    assert "train" in dataset.formatted_ds
-    assert "validation" in dataset.formatted_ds
+    # check the input and output keys
+    assert dataset.input_key == input_key
+    assert dataset.output_key == output_key
+
+    # check the first example
+    first_example = dataset.dataset[0]
+
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 2
+    assert "messages" in first_example
+    assert "task_name" in first_example
+
+    # check the combined message
+    chat_template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
+    combined_message = tokenizer.apply_chat_template(
+        first_example["messages"],
+        chat_template=chat_template,
+        tokenize=False,
+        add_generation_prompt=False,
+        add_special_tokens=False,
+    )
+    assert combined_message == " Question: Hello Answer: Hi there!"
 
 
-@pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True)
-def test_custom_keys(sample_data):
+def test_helpsteer3_dataset():
     # load the dataset
-    train_path, val_path = sample_data
-    data_config = {
-        "dataset_name": "ResponseDataset",
-        "train_data_path": train_path,
-        "val_data_path": val_path,
-        "input_key": "question",
-        "output_key": "answer",
-    }
+    data_config = {"dataset_name": "HelpSteer3"}
     dataset = load_response_dataset(data_config)
 
-    assert dataset.input_key == "question"
-    assert dataset.output_key == "answer"
+    # check the first example
+    first_example = dataset.dataset[0]
+
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 3
+    assert "context" in first_example
+    assert "response" in first_example
+    assert "task_name" in first_example
+
+    # check the content
+    assert len(first_example["context"]) == 7
+    assert first_example["response"][0]["role"] == "assistant"
+    assert first_example["response"][0]["content"][:20] == "Yes, you are correct"
 
 
-@pytest.mark.hf_gated
-@pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True)
-def test_message_formatting(sample_data):
+def test_open_assistant_dataset():
     # load the dataset
-    train_path, val_path = sample_data
     data_config = {
-        "dataset_name": "ResponseDataset",
-        "train_data_path": train_path,
-        "val_data_path": val_path,
-        "input_key": "question",
-        "output_key": "answer",
+        "dataset_name": "open_assistant",
+        "split_validation_size": 0.05,
     }
     dataset = load_response_dataset(data_config)
 
-    first_example = dataset.formatted_ds["train"][0]
+    # check the first example
+    first_example = dataset.dataset[0]
+    first_val_example = dataset.val_dataset[0]
 
-    assert first_example["messages"][0]["role"] == "user"
-    assert first_example["messages"][0]["content"] == "Hello"
-    assert first_example["messages"][1]["role"] == "assistant"
-    assert first_example["messages"][1]["content"] == "Hi there!"
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 2
+    assert "messages" in first_example
+    assert "task_name" in first_example
 
-    chat_template = COMMON_CHAT_TEMPLATES.passthrough_prompt_response
-    tokenizer = AutoTokenizer.from_pretrained("Meta-Llama/Meta-Llama-3-8B-Instruct")
+    # check the content
+    assert first_example["messages"][-1]["content"][:20] == "```\n    def forward("
+    assert len(first_example["messages"]) == 7
+    assert first_val_example["messages"][-1]["content"][:20] == "The colors you shoul"
+    assert len(first_val_example["messages"]) == 5
 
+
+@pytest.mark.parametrize(
+    "dataset_name",
+    ["DAPOMath17K", "DAPOMathAIME2024", "DeepScaler", "AIME2024", "squad"],
+)
+def test_build_in_dataset(dataset_name, tokenizer):
+    # load the dataset
+    data_config = {"dataset_name": dataset_name}
+    dataset = load_response_dataset(data_config)
+
+    # check the first example
+    first_example = dataset.dataset[0]
+
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 2
+    assert "messages" in first_example
+    assert "task_name" in first_example
+
+    # check the content
+    if dataset_name == "DAPOMath17K":
+        assert first_example["messages"][1]["content"] == "34"
+    elif dataset_name == "DAPOMathAIME2024":
+        assert first_example["messages"][1]["content"] == "540"
+    elif dataset_name == "DeepScaler":
+        assert first_example["messages"][1]["content"] == "-\\frac{2}{3}"
+    elif dataset_name == "AIME2024":
+        assert first_example["messages"][1]["content"] == "204"
+        assert len(dataset.dataset) == 480
+    elif dataset_name == "squad":
+        assert first_example["messages"][2]["content"] == "Saint Bernadette Soubirous"
+
+    # check the combined message
+    chat_template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
     combined_message = tokenizer.apply_chat_template(
         first_example["messages"],
         chat_template=chat_template,
@@ -118,122 +173,112 @@ def test_message_formatting(sample_data):
         add_special_tokens=False,
     )
 
-    assert combined_message == "".join(
-        message["content"] for message in first_example["messages"]
-    )
+    if dataset_name == "squad":
+        assert combined_message == (
+            "Context: "
+            + first_example["messages"][0]["content"]
+            + " Question: "
+            + first_example["messages"][1]["content"]
+            + " Answer: "
+            + first_example["messages"][2]["content"]
+        )
+    else:
+        assert combined_message == (
+            " Question: "
+            + first_example["messages"][0]["content"]
+            + " Answer: "
+            + first_example["messages"][1]["content"]
+        )
 
 
-@pytest.mark.hf_gated
-@pytest.mark.skip(reason="dataset download is flaky")
-def test_squad_dataset():
+@pytest.mark.parametrize(
+    "dataset_name,output_key",
+    [
+        ("OpenMathInstruct-2", "expected_answer"),
+        ("OpenMathInstruct-2", "generated_solution"),
+        ("tulu3_sft_mixture", None),
+    ],
+)
+def test_build_in_dataset_with_split_validation(dataset_name, output_key, tokenizer):
     # load the dataset
     data_config = {
-        "dataset_name": "squad",
-        "prompt_file": None,
-        "system_prompt_file": None,
+        "dataset_name": dataset_name,
+        "output_key": output_key,
+        "split_validation_size": 0.05,
     }
-    squad_dataset = load_response_dataset(data_config)
-
-    # load the tokenizer
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+    dataset = load_response_dataset(data_config)
 
-    # check that the dataset is formatted correctly
-    for example in squad_dataset.formatted_ds["train"].take(5):
-        assert "messages" in example
-        assert len(example["messages"]) == 3
+    # check the first example
+    first_example = dataset.dataset[0]
+    first_val_example = dataset.val_dataset[0]
+
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 2
+    assert "messages" in first_example
+    assert "task_name" in first_example
+
+    # check the content
+    if dataset_name == "OpenMathInstruct-2":
+        if output_key == "expected_answer":
+            assert first_example["messages"][1]["content"] == "\\frac{8\\sqrt{3}}{3}"
+        elif output_key == "generated_solution":
+            assert (
+                first_example["messages"][1]["content"][:20] == "Let's denote the poi"
+            )
+    elif dataset_name == "tulu3_sft_mixture":
+        assert first_example["messages"][1]["content"][:20] == "I'm sorry, but I can"
+
+    # check the combined message
+    messages = [first_example["messages"], first_val_example["messages"]]
+    chat_template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
+    combined_message = tokenizer.apply_chat_template(
+        messages,
+        chat_template=chat_template,
+        tokenize=False,
+        add_generation_prompt=False,
+        add_special_tokens=False,
+    )
 
-        assert example["messages"][0]["role"] == "system"
-        assert example["messages"][1]["role"] == "user"
-        assert example["messages"][2]["role"] == "assistant"
+    for i in range(2):
+        assert combined_message[i] == (
+            " Question: "
+            + messages[i][0]["content"]
+            + " Answer: "
+            + messages[i][1]["content"]
+        )
 
-        template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
 
-        ## check that applying chat template works as expected
-        default_templated = tokenizer.apply_chat_template(
-            example["messages"],
-            chat_template=template,
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
+@pytest.mark.parametrize(
+    "dataset_name,format_func",
+    [
+        ("clevr-cogent", format_clevr_cogent_dataset),
+        ("geometry3k", format_geometry3k_dataset),
+        # ("refcoco", format_refcoco_dataset), # this needs download 13.5G image
+    ],
+)
+def test_vlm_dataset(dataset_name, format_func):
+    # load the dataset
+    data_config = {"dataset_name": dataset_name}
+    dataset = load_response_dataset(data_config)
 
-        assert default_templated == (
-            "Context: "
-            + example["messages"][0]["content"]
-            + " Question: "
-            + example["messages"][1]["content"]
-            + " Answer: "
-            + example["messages"][2]["content"]
-        )
+    # check the first example
+    first_example = dataset.dataset[0]
+    first_example = format_func(first_example)
 
+    # only contains messages and task_name
+    assert len(first_example.keys()) == 2
+    assert "messages" in first_example
+    assert "task_name" in first_example
 
-def test_load_dataset_saved_with_save_to_disk():
-    """Test loading a dataset that was saved using HuggingFace's save_to_disk().
-
-    This tests the fix for datasets that already have a 'messages' column,
-    which should be preserved without applying add_messages_key again.
-    """
-    from datasets import Dataset
-
-    # Create a dataset with 'messages' column already present
-    train_data = [
-        {
-            "messages": [
-                {"role": "user", "content": "What is 2+2?"},
-                {"role": "assistant", "content": "4"},
-            ]
-        },
-        {
-            "messages": [
-                {"role": "user", "content": "What is the capital of France?"},
-                {"role": "assistant", "content": "Paris"},
-            ]
-        },
-    ]
-    val_data = [
-        {
-            "messages": [
-                {"role": "user", "content": "What is 3+3?"},
-                {"role": "assistant", "content": "6"},
-            ]
-        },
-    ]
+    # check the content
+    assert first_example["messages"][0]["role"] == "user"
+    assert first_example["messages"][0]["content"][0]["type"] == "image"
+    assert first_example["messages"][0]["content"][1]["type"] == "text"
+    assert first_example["messages"][1]["role"] == "assistant"
 
-    with tempfile.TemporaryDirectory() as tmpdir:
-        # Create HF datasets and save using save_to_disk
-        train_dataset = Dataset.from_list(train_data)
-        val_dataset = Dataset.from_list(val_data)
-
-        train_path = f"{tmpdir}/train"
-        val_path = f"{tmpdir}/val"
-
-        train_dataset.save_to_disk(train_path)
-        val_dataset.save_to_disk(val_path)
-
-        # Load using load_response_dataset
-        data_config = {
-            "dataset_name": "ResponseDataset",
-            "train_data_path": train_path,
-            "val_data_path": val_path,
-        }
-        dataset = load_response_dataset(data_config)
-
-        # Verify the dataset loaded correctly
-        assert "train" in dataset.formatted_ds
-        assert "validation" in dataset.formatted_ds
-        assert len(dataset.formatted_ds["train"]) == 2
-        assert len(dataset.formatted_ds["validation"]) == 1
-
-        # Verify messages are preserved correctly
-        first_train_example = dataset.formatted_ds["train"][0]
-        assert "messages" in first_train_example
-        assert len(first_train_example["messages"]) == 2
-        assert first_train_example["messages"][0]["role"] == "user"
-        assert first_train_example["messages"][0]["content"] == "What is 2+2?"
-        assert first_train_example["messages"][1]["role"] == "assistant"
-        assert first_train_example["messages"][1]["content"] == "4"
-
-        # Verify validation data
-        first_val_example = dataset.formatted_ds["validation"][0]
-        assert first_val_example["messages"][0]["content"] == "What is 3+3?"
-        assert first_val_example["messages"][1]["content"] == "6"
+    if dataset_name == "clevr-cogent":
+        assert first_example["messages"][1]["content"] == "3"
+    elif dataset_name == "geometry3k":
+        assert first_example["messages"][1]["content"] == "3"
+    elif dataset_name == "refcoco":
+        assert first_example["messages"][1]["content"] == "[243, 469, 558, 746]"
diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py
index 7e2fa903f8..343bbe30bb 100644
--- a/tests/unit/data/test_data_processor.py
+++ b/tests/unit/data/test_data_processor.py
@@ -146,7 +146,7 @@ def test_math_hf_data_processor(tokenizer_name, dataset_cls):
     task_data_processors[task_name] = (math_task_spec, math_hf_data_processor)
 
     dataset = AllTaskProcessedDataset(
-        dataset=data.formatted_ds["train"],
+        dataset=data.dataset,
         tokenizer=tokenizer,
         default_task_data_spec=math_task_spec,
         task_data_processors=task_data_processors,
diff --git a/tests/unit/data/test_data_shuffle_reproducity.py b/tests/unit/data/test_data_shuffle_reproducity.py
index a918648dc6..4074e0d0fa 100644
--- a/tests/unit/data/test_data_shuffle_reproducity.py
+++ b/tests/unit/data/test_data_shuffle_reproducity.py
@@ -63,7 +63,7 @@ def create_dataloader(
     task_data_processors[task_name] = (math_task_spec, math_hf_data_processor)
 
     dataset = AllTaskProcessedDataset(
-        dataset=data.formatted_ds["train"].select(range(1000)),
+        dataset=data.dataset.select(range(1000)),
         tokenizer=tokenizer,
         default_task_data_spec=math_task_spec,
         task_data_processors=task_data_processors,
diff --git a/tests/unit/environments/test_code_jaccard_environment.py b/tests/unit/environments/test_code_jaccard_environment.py
index f2af133585..0880fcc6f6 100644
--- a/tests/unit/environments/test_code_jaccard_environment.py
+++ b/tests/unit/environments/test_code_jaccard_environment.py
@@ -28,7 +28,7 @@ def code_jaccard_env_config():
 
 @pytest.fixture(scope="module")
 def code_jaccard_env(code_jaccard_env_config):
-    env = create_env("code_jaccard", {"code_jaccard": code_jaccard_env_config})
+    env = create_env("code_jaccard", code_jaccard_env_config)
     yield env
     env.shutdown.remote()
     ray.kill(env)
diff --git a/tests/unit/models/automodel/__init__.py b/tests/unit/models/automodel/__init__.py
new file mode 100644
index 0000000000..341a77c5bc
--- /dev/null
+++ b/tests/unit/models/automodel/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/models/automodel/test_automodel_setup.py b/tests/unit/models/automodel/test_automodel_setup.py
new file mode 100644
index 0000000000..619a71dc1b
--- /dev/null
+++ b/tests/unit/models/automodel/test_automodel_setup.py
@@ -0,0 +1,1502 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for automodel setup utilities."""
+
+import os
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+
+pytest_plugins = []
+try:
+    import nemo_automodel  # noqa: F401
+except ImportError:
+    pytest.skip("nemo_automodel not available", allow_module_level=True)
+
+import torch
+
+from nemo_rl.models.automodel.setup import (
+    ModelAndOptimizerState,
+    RuntimeConfig,
+    setup_distributed,
+    setup_model_and_optimizer,
+    setup_reference_model_state,
+    validate_and_prepare_config,
+)
+
+
+@pytest.fixture
+def mock_config():
+    """Create a mock policy configuration for testing."""
+    return {
+        "model_name": "gpt2",
+        "precision": "bfloat16",
+        "max_grad_norm": 1.0,
+        "offload_optimizer_for_logprob": False,
+        "sequence_packing": {"enabled": False},
+        "dtensor_cfg": {
+            "cpu_offload": False,
+            "context_parallel_size": 1,
+            "tensor_parallel_size": 1,
+            "expert_parallel_size": 1,
+            "data_parallel_size": None,
+            "sequence_parallel": False,
+            "use_hf_tp_plan": False,
+            "activation_checkpointing": False,
+        },
+        "generation": None,
+        "hf_config_overrides": {},
+        "optimizer": {
+            "name": "torch.optim.AdamW",
+            "kwargs": {"lr": 1e-4},
+        },
+    }
+
+
+@pytest.fixture
+def mock_autoconfig():
+    """Create a mock AutoConfig for testing."""
+    config = MagicMock()
+    config.architectures = ["GPT2LMHeadModel"]
+    config.model_type = "gpt2"
+    config.num_labels = 2
+    config.torch_dtype = "float32"
+    return config
+
+
+@pytest.mark.automodel
+class TestValidateAndPrepareConfig:
+    """Test suite for validate_and_prepare_config function."""
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_basic_validation(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test basic configuration validation returns correct values."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        # Verify result is a RuntimeConfig named tuple
+        assert isinstance(result, RuntimeConfig)
+        assert result.dtype == torch.bfloat16
+        assert result.cpu_offload is False
+        assert result.offload_optimizer_for_logprob is False
+        assert result.max_grad_norm == 1.0
+        assert result.enable_seq_packing is False
+        assert result.model_class is not None
+        assert result.model_config is not None
+        assert isinstance(result.allow_flash_attn_args, bool)
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_precision_validation_invalid(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+    ):
+        """Test that invalid precision raises ValueError."""
+        mock_config["precision"] = "invalid_precision"
+
+        with pytest.raises(ValueError, match="Unknown precision"):
+            validate_and_prepare_config(
+                config=mock_config,
+                processor=None,
+                rank=0,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_sequence_packing_with_vlm_raises_error(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+    ):
+        """Test that sequence packing with VLM raises ValueError."""
+        mock_config["sequence_packing"]["enabled"] = True
+        processor = MagicMock()
+
+        with pytest.raises(
+            ValueError, match="Sequence packing is not supported for VLM"
+        ):
+            validate_and_prepare_config(
+                config=mock_config,
+                processor=processor,
+                rank=0,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    @patch("nemo_rl.models.automodel.setup.NeMoAutoModelForSequenceClassification")
+    def test_reward_model_bradley_terry(
+        self,
+        mock_rm_class,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test reward model configuration with Bradley-Terry type."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+
+        mock_config["reward_model_cfg"] = {
+            "enabled": True,
+            "reward_model_type": "bradley_terry",
+        }
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        # Verify num_labels was set to 1 for bradley_terry reward model
+        assert mock_autoconfig.num_labels == 1
+        # Result should be valid RuntimeConfig
+        assert isinstance(result, RuntimeConfig)
+        assert result.is_reward_model is True
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_context_parallel_with_sequence_packing_raises_error(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+    ):
+        """Test that CP with sequence packing raises ValueError."""
+        mock_config["sequence_packing"]["enabled"] = True
+        mock_config["dtensor_cfg"]["context_parallel_size"] = 2
+
+        with pytest.raises(
+            ValueError, match="Context parallel is not supported for sequence packing"
+        ):
+            validate_and_prepare_config(
+                config=mock_config,
+                processor=None,
+                rank=0,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_sequence_parallel_with_tp_size_one_prints_warning(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+        capsys,
+    ):
+        """Test that sequence parallel with tp = 1 prints a warning."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        mock_config["dtensor_cfg"]["sequence_parallel"] = True
+        mock_config["dtensor_cfg"]["tensor_parallel_size"] = 1
+
+        # Should not raise an error, just print a warning
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        # Verify result is valid
+        assert isinstance(result, RuntimeConfig)
+
+        # Check warning was printed
+        captured = capsys.readouterr()
+        assert (
+            "sequence_parallel=True, but tp_size=1 which has no effect" in captured.out
+        )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_attention_implementation_selection(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test attention implementation is selected correctly."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        # Test FA2 for sequence packing with cp=1
+        mock_config["sequence_packing"]["enabled"] = True
+        mock_config["dtensor_cfg"]["context_parallel_size"] = 1
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.attn_impl == "flash_attention_2"
+
+        # Test SDPA for cp > 1
+        mock_config["sequence_packing"]["enabled"] = False
+        mock_config["dtensor_cfg"]["context_parallel_size"] = 2
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.attn_impl == "sdpa"
+
+        # Test None for cp=1 without sequence packing
+        mock_config["dtensor_cfg"]["context_parallel_size"] = 1
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.attn_impl is None
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_precision_types(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test all supported precision types."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        # Test float32
+        mock_config["precision"] = "float32"
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.dtype == torch.float32
+
+        # Test float16
+        mock_config["precision"] = "float16"
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.dtype == torch.float16
+
+        # Test bfloat16
+        mock_config["precision"] = "bfloat16"
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.dtype == torch.bfloat16
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    @patch.dict(os.environ, {}, clear=True)
+    def test_generation_colocated(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test generation colocated configuration."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        # Test with generation colocated enabled
+        mock_config["generation"] = {"colocated": {"enabled": True}}
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.is_generation_colocated is True
+        # NCCL_CUMEM_ENABLE should not be set when colocated
+        assert "NCCL_CUMEM_ENABLE" not in os.environ
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    @patch.dict(os.environ, {}, clear=True)
+    def test_generation_not_colocated(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test generation not colocated sets NCCL environment variable."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        # Test with generation colocated disabled
+        mock_config["generation"] = {"colocated": {"enabled": False}}
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.is_generation_colocated is False
+        # NCCL_CUMEM_ENABLE should be set when not colocated
+        assert os.environ.get("NCCL_CUMEM_ENABLE") == "1"
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_allow_flash_attn_args_nemotron_nas(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+    ):
+        """Test flash attention args disabled for Nemotron NAS."""
+        mock_autoconfig = MagicMock()
+        mock_autoconfig.architectures = ["DeciLMForCausalLM"]
+        mock_autoconfig.model_type = "nemotron-nas"
+        mock_autoconfig.torch_dtype = "float32"
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        result = validate_and_prepare_config(mock_config, None, 0)
+        assert result.allow_flash_attn_args is False
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_sequence_packing_with_reward_model_raises_error(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test that sequence packing with reward model raises NotImplementedError."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_config["sequence_packing"]["enabled"] = True
+        mock_config["reward_model_cfg"] = {
+            "enabled": True,
+            "reward_model_type": "bradley_terry",
+        }
+
+        with pytest.raises(
+            NotImplementedError,
+            match="Sequence packing is not supported for reward models",
+        ):
+            validate_and_prepare_config(
+                config=mock_config,
+                processor=None,
+                rank=0,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_unknown_reward_model_type_raises_error(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test that unknown reward model type raises ValueError."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_config["reward_model_cfg"] = {
+            "enabled": True,
+            "reward_model_type": "unknown_type",
+        }
+
+        with pytest.raises(ValueError, match="Unknown reward model type: unknown_type"):
+            validate_and_prepare_config(
+                config=mock_config,
+                processor=None,
+                rank=0,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    @patch("nemo_rl.models.automodel.setup.NeMoAutoModelForSequenceClassification")
+    def test_reward_model_bradley_terry_num_labels_already_one(
+        self,
+        mock_rm_class,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        capsys,
+    ):
+        """Test reward model with num_labels already set to 1 does not print warning."""
+        mock_autoconfig = MagicMock()
+        mock_autoconfig.architectures = ["GPT2LMHeadModel"]
+        mock_autoconfig.model_type = "gpt2"
+        mock_autoconfig.num_labels = 1  # Already 1
+        mock_autoconfig.torch_dtype = "float32"
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+
+        mock_config["reward_model_cfg"] = {
+            "enabled": True,
+            "reward_model_type": "bradley_terry",
+        }
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        # Should not print the warning about num_labels
+        captured = capsys.readouterr()
+        assert "model_config.num_labels is not 1" not in captured.out
+        assert result.is_reward_model is True
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_sequence_packing_enabled_prints_info(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+        capsys,
+    ):
+        """Test that sequence packing enabled prints info messages."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        mock_config["sequence_packing"]["enabled"] = True
+        mock_config["dtensor_cfg"]["context_parallel_size"] = 1
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        captured = capsys.readouterr()
+        assert "[Rank 0] Sequence packing is enabled for model gpt2" in captured.out
+        assert "[Rank 0] Using FlashAttention2 for sequence packing" in captured.out
+        assert result.enable_seq_packing is True
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_hf_config_overrides_none_becomes_empty_dict(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test that None hf_config_overrides becomes empty dict."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        mock_config["hf_config_overrides"] = None
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        assert result.hf_config_overrides == {}
+
+    @patch("nemo_rl.models.automodel.setup.AutoConfig")
+    @patch("nemo_rl.models.automodel.setup.resolve_model_class")
+    @patch("nemo_rl.models.automodel.setup.configure_dynamo_cache")
+    def test_missing_hf_config_overrides_becomes_empty_dict(
+        self,
+        mock_dynamo,
+        mock_resolve_class,
+        mock_autoconfig_class,
+        mock_config,
+        mock_autoconfig,
+    ):
+        """Test that missing hf_config_overrides becomes empty dict."""
+        mock_autoconfig_class.from_pretrained.return_value = mock_autoconfig
+        mock_resolve_class.return_value = Mock
+
+        del mock_config["hf_config_overrides"]
+
+        result = validate_and_prepare_config(
+            config=mock_config,
+            processor=None,
+            rank=0,
+        )
+
+        assert result.hf_config_overrides == {}
+
+
+@pytest.mark.automodel
+class TestSetupReferenceModelState:
+    """Test suite for setup_reference_model_state function."""
+
+    @patch("nemo_rl.models.automodel.setup.get_cpu_state_dict")
+    def test_setup_reference_model_state_calls_get_cpu_state_dict(
+        self, mock_get_cpu_state_dict
+    ):
+        """Test that setup_reference_model_state calls get_cpu_state_dict correctly."""
+        mock_model = MagicMock()
+        mock_state_dict = {
+            "weight1": torch.tensor([1.0]),
+            "weight2": torch.tensor([2.0]),
+        }
+        mock_model.state_dict.return_value = mock_state_dict
+        mock_get_cpu_state_dict.return_value = {"weight1": torch.tensor([1.0])}
+
+        result = setup_reference_model_state(mock_model)
+
+        mock_model.state_dict.assert_called_once()
+        mock_get_cpu_state_dict.assert_called_once()
+        # Verify pin_memory=True was passed
+        call_kwargs = mock_get_cpu_state_dict.call_args[1]
+        assert call_kwargs["pin_memory"] is True
+        assert result == {"weight1": torch.tensor([1.0])}
+
+    @patch("nemo_rl.models.automodel.setup.get_cpu_state_dict")
+    def test_setup_reference_model_state_returns_dict(self, mock_get_cpu_state_dict):
+        """Test that setup_reference_model_state returns a dictionary."""
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        expected_result = {"param": torch.zeros(10)}
+        mock_get_cpu_state_dict.return_value = expected_result
+
+        result = setup_reference_model_state(mock_model)
+
+        assert result == expected_result
+
+
+@pytest.mark.automodel
+class TestSetupDistributed:
+    """Test suite for setup_distributed function."""
+
+    @pytest.fixture
+    def mock_runtime_config(self):
+        """Create a mock RuntimeConfig for testing."""
+        return RuntimeConfig(
+            model_class=Mock,
+            model_config=MagicMock(),
+            hf_config_overrides={},
+            allow_flash_attn_args=True,
+            attn_impl=None,
+            dtype=torch.bfloat16,
+            enable_seq_packing=False,
+            max_grad_norm=1.0,
+            cpu_offload=False,
+            offload_optimizer_for_logprob=False,
+            is_generation_colocated=None,
+            is_reward_model=False,
+        )
+
+    @patch("nemo_rl.models.automodel.setup.FSDP2Manager")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed")
+    def test_setup_distributed_basic(
+        self, mock_torch_dist, mock_fsdp2_manager, mock_config, mock_runtime_config
+    ):
+        """Test basic distributed setup without CPU offload."""
+        mock_torch_dist.get_world_size.return_value = 8
+        mock_manager_instance = MagicMock()
+        mock_fsdp2_manager.return_value = mock_manager_instance
+
+        result = setup_distributed(mock_config, mock_runtime_config)
+
+        mock_torch_dist.init_process_group.assert_called_once_with(backend="nccl")
+        assert result == mock_manager_instance
+
+    @patch("nemo_rl.models.automodel.setup.FSDP2Manager")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed")
+    def test_setup_distributed_with_cpu_offload(
+        self, mock_torch_dist, mock_fsdp2_manager, mock_config
+    ):
+        """Test distributed setup with CPU offload."""
+        mock_torch_dist.get_world_size.return_value = 4
+        mock_manager_instance = MagicMock()
+        mock_fsdp2_manager.return_value = mock_manager_instance
+
+        runtime_config = RuntimeConfig(
+            model_class=Mock,
+            model_config=MagicMock(),
+            hf_config_overrides={},
+            allow_flash_attn_args=True,
+            attn_impl=None,
+            dtype=torch.bfloat16,
+            enable_seq_packing=False,
+            max_grad_norm=1.0,
+            cpu_offload=True,  # CPU offload enabled
+            offload_optimizer_for_logprob=False,
+            is_generation_colocated=None,
+            is_reward_model=False,
+        )
+
+        result = setup_distributed(mock_config, runtime_config)
+
+        mock_torch_dist.init_process_group.assert_called_once_with(
+            backend="cuda:nccl,cpu:gloo"
+        )
+        assert result == mock_manager_instance
+
+    @patch("nemo_rl.models.automodel.setup.FSDP2Manager")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed")
+    def test_setup_distributed_world_size_one_calls_setup(
+        self, mock_torch_dist, mock_fsdp2_manager, mock_config, mock_runtime_config
+    ):
+        """Test that world_size=1 calls _setup_distributed on manager."""
+        mock_torch_dist.get_world_size.return_value = 1
+        mock_manager_instance = MagicMock()
+        mock_fsdp2_manager.return_value = mock_manager_instance
+
+        result = setup_distributed(mock_config, mock_runtime_config)
+
+        mock_manager_instance._setup_distributed.assert_called_once()
+        assert result == mock_manager_instance
+
+    @patch("nemo_rl.models.automodel.setup.FSDP2Manager")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed")
+    def test_setup_distributed_passes_correct_params(
+        self, mock_torch_dist, mock_fsdp2_manager, mock_config, mock_runtime_config
+    ):
+        """Test that FSDP2Manager is initialized with correct parameters."""
+        mock_torch_dist.get_world_size.return_value = 4
+
+        setup_distributed(mock_config, mock_runtime_config)
+
+        call_kwargs = mock_fsdp2_manager.call_args[1]
+        assert call_kwargs["dp_size"] is None
+        assert call_kwargs["dp_replicate_size"] == 1
+        assert call_kwargs["tp_size"] == 1
+        assert call_kwargs["cp_size"] == 1
+        assert call_kwargs["ep_size"] == 1
+        assert call_kwargs["pp_size"] == 1
+        assert call_kwargs["sequence_parallel"] is False
+        assert call_kwargs["backend"] == "nccl"
+        assert call_kwargs["world_size"] == 4
+        assert call_kwargs["activation_checkpointing"] is False
+
+
+@pytest.mark.automodel
+class TestSetupModelAndOptimizer:
+    """Test suite for setup_model_and_optimizer function."""
+
+    @pytest.fixture
+    def mock_runtime_config(self, mock_autoconfig):
+        """Create a mock RuntimeConfig for testing."""
+        return RuntimeConfig(
+            model_class=MagicMock(),
+            model_config=mock_autoconfig,
+            hf_config_overrides={},
+            allow_flash_attn_args=True,
+            attn_impl=None,
+            dtype=torch.bfloat16,
+            enable_seq_packing=False,
+            max_grad_norm=1.0,
+            cpu_offload=False,
+            offload_optimizer_for_logprob=False,
+            is_generation_colocated=None,
+            is_reward_model=False,
+        )
+
+    @pytest.fixture
+    def mock_distributed_manager(self):
+        """Create a mock FSDP2Manager for testing."""
+        manager = MagicMock()
+        manager.device_mesh = MagicMock()
+        manager.device_mesh.mesh_dim_names = ["dp_shard_cp"]
+        manager.moe_mesh = MagicMock()
+        manager.tp_size = 1
+        manager.cp_size = 1
+        manager.sequence_parallel = False
+        return manager
+
+    @pytest.fixture
+    def mock_checkpoint_manager(self):
+        """Create a mock checkpoint manager for testing."""
+        return MagicMock()
+
+    @pytest.fixture
+    def mock_tokenizer(self):
+        """Create a mock tokenizer for testing."""
+        tokenizer = MagicMock()
+        tokenizer.pad_token_id = 0
+        return tokenizer
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_and_optimizer_basic(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test basic model and optimizer setup."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        # Setup mock model
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {"layer.weight": torch.zeros(10)}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = None
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        # Setup mock optimizer
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+            is_vlm=False,
+            init_optimizer=True,
+        )
+
+        assert isinstance(result, ModelAndOptimizerState)
+        mock_checkpoint_manager.set_model_state_dict_keys.assert_called_once()
+        mock_checkpoint_manager.load_base_model.assert_called_once()
+
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_and_optimizer_no_optimizer(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup without optimizer initialization."""
+        mock_get_rank.return_value = 0
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+            init_optimizer=False,
+        )
+
+        assert result.optimizer is None
+        assert result.scheduler is None
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_weights_path(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with checkpoint loading."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+            weights_path="/path/to/weights",
+            optimizer_path="/path/to/optimizer",
+        )
+
+        mock_checkpoint_manager.load_checkpoint.assert_called_once()
+        call_kwargs = mock_checkpoint_manager.load_checkpoint.call_args[1]
+        assert call_kwargs["weights_path"] == "/path/to/weights"
+        assert call_kwargs["optimizer_path"] == "/path/to/optimizer"
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_no_weights_path_prints_message(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+        capsys,
+    ):
+        """Test that no weights path prints info message."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+            weights_path=None,
+        )
+
+        captured = capsys.readouterr()
+        assert "No weights path provided" in captured.out
+
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_dict_scheduler(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with scheduler as dict config."""
+        mock_get_rank.return_value = 0
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_scheduler = MagicMock()
+
+        def get_class_side_effect(name):
+            if "optim" in name.lower():
+                return MagicMock(return_value=mock_optimizer)
+            return MagicMock(return_value=mock_scheduler)
+
+        mock_get_class.side_effect = get_class_side_effect
+
+        mock_config["scheduler"] = {
+            "name": "torch.optim.lr_scheduler.StepLR",
+            "kwargs": {"step_size": 10},
+        }
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        assert result.scheduler is not None
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.SequentialLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_list_scheduler(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_sequential_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with scheduler as list config (SequentialLR)."""
+        mock_get_rank.return_value = 0
+        mock_sequential_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_scheduler = MagicMock()
+
+        def get_class_side_effect(name):
+            if "optim.Adam" in name or "optim.SGD" in name:
+                return MagicMock(return_value=mock_optimizer)
+            return MagicMock(return_value=mock_scheduler)
+
+        mock_get_class.side_effect = get_class_side_effect
+
+        mock_config["scheduler"] = [
+            {
+                "name": "torch.optim.lr_scheduler.LinearLR",
+                "kwargs": {"start_factor": 0.1},
+            },
+            {"name": "torch.optim.lr_scheduler.StepLR", "kwargs": {"step_size": 10}},
+            {"milestones": [5]},
+        ]
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        assert result.scheduler is not None
+        mock_sequential_lr.assert_called_once()
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_sets_pad_token_id(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test that pad_token_id is set from tokenizer when None."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = None  # Initially None
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+        mock_tokenizer.pad_token_id = 42
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        assert mock_model.config.pad_token_id == 42
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_moe_model(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup detects MoE model correctly."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        # Include "expert" in state dict keys to trigger MoE detection
+        mock_model.state_dict.return_value = {
+            "layer.expert.weight": torch.zeros(10),
+            "layer.weight": torch.zeros(10),
+        }
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        assert result.is_moe_model is True
+
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_cp_raises_for_vlm(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test that context parallel with VLM raises AssertionError."""
+        mock_get_rank.return_value = 0
+        mock_distributed_manager.cp_size = 2  # CP enabled
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+
+        with pytest.raises(
+            AssertionError, match="Context parallel is yet not supported for VLM models"
+        ):
+            setup_model_and_optimizer(
+                config=mock_config,
+                tokenizer=mock_tokenizer,
+                runtime_config=mock_runtime_config,
+                distributed_manager=mock_distributed_manager,
+                checkpoint_manager=mock_checkpoint_manager,
+                is_vlm=True,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_cp_and_sp_raises_error(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test that CP with sequence parallel raises AssertionError."""
+        mock_get_rank.return_value = 0
+        mock_distributed_manager.cp_size = 2  # CP enabled
+        mock_distributed_manager.tp_size = 2  # TP enabled
+        mock_distributed_manager.sequence_parallel = True  # SP enabled
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+
+        with pytest.raises(
+            AssertionError,
+            match="context parallel can't be used together with sequence parallel",
+        ):
+            setup_model_and_optimizer(
+                config=mock_config,
+                tokenizer=mock_tokenizer,
+                runtime_config=mock_runtime_config,
+                distributed_manager=mock_distributed_manager,
+                checkpoint_manager=mock_checkpoint_manager,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_cp_raises_for_gemma3(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test that context parallel with Gemma3 raises AssertionError."""
+        from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
+
+        mock_get_rank.return_value = 0
+        mock_distributed_manager.cp_size = 2  # CP enabled
+        mock_distributed_manager.tp_size = 1
+        mock_distributed_manager.sequence_parallel = False
+
+        # Create a mock model that will pass the isinstance check for Gemma3ForCausalLM
+        mock_model = MagicMock(spec=Gemma3ForCausalLM)
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["Gemma3ForCausalLM"]
+
+        with pytest.raises(
+            AssertionError,
+            match="Context parallel is not supported for Gemma3ForCausalLM",
+        ):
+            setup_model_and_optimizer(
+                config=mock_config,
+                tokenizer=mock_tokenizer,
+                runtime_config=mock_runtime_config,
+                distributed_manager=mock_distributed_manager,
+                checkpoint_manager=mock_checkpoint_manager,
+            )
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    @patch("nemo_rl.models.automodel.setup._resolve_target")
+    def test_setup_model_with_backend_automodel_kwargs(
+        self,
+        mock_resolve_target,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with custom backend in automodel_kwargs."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_backend_class = MagicMock()
+        mock_resolve_target.return_value = mock_backend_class
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        mock_config["dtensor_cfg"]["automodel_kwargs"] = {
+            "backend": {
+                "_target_": "some.backend.Class",
+                "param1": "value1",
+            }
+        }
+
+        setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        mock_resolve_target.assert_called_once_with("some.backend.Class")
+        mock_backend_class.assert_called_once_with(param1="value1")
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    @patch("nemo_rl.models.automodel.setup.PeftConfig")
+    @patch("nemo_rl.models.automodel.setup.apply_lora_to_linear_modules")
+    def test_setup_model_with_lora(
+        self,
+        mock_apply_lora,
+        mock_peft_config,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with LoRA enabled."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_peft_config_instance = MagicMock()
+        mock_peft_config_instance.lora_A_init = "kaiming"
+        mock_peft_config.from_dict.return_value = mock_peft_config_instance
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        mock_config["dtensor_cfg"]["lora_cfg"] = {
+            "enabled": True,
+            "use_triton": False,
+            "rank": 8,
+        }
+
+        result = setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        mock_peft_config.from_dict.assert_called_once()
+        mock_apply_lora.assert_called_once()
+        assert result.peft_config == mock_peft_config_instance
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    @patch("nemo_rl.models.automodel.setup.cuda", create=True)
+    def test_setup_model_with_activation_checkpointing(
+        self,
+        mock_cuda,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with activation checkpointing enabled."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        mock_config["dtensor_cfg"]["activation_checkpointing"] = True
+
+        with patch(
+            "nemo_rl.models.automodel.setup.torch.backends.cuda"
+        ) as mock_torch_cuda:
+            setup_model_and_optimizer(
+                config=mock_config,
+                tokenizer=mock_tokenizer,
+                runtime_config=mock_runtime_config,
+                distributed_manager=mock_distributed_manager,
+                checkpoint_manager=mock_checkpoint_manager,
+            )
+
+            mock_torch_cuda.enable_cudnn_sdp.assert_called_with(False)
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_tied_word_embeddings(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_runtime_config,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with tied word embeddings."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        mock_embed_weight = torch.nn.Parameter(torch.zeros(100, 768))
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_model.config.tie_word_embeddings = True
+        mock_model.lm_head = MagicMock()
+
+        # Setup named_parameters to return embed_tokens
+        mock_model.named_parameters.return_value = [
+            ("model.embed_tokens.weight", mock_embed_weight),
+            ("lm_head.weight", torch.nn.Parameter(torch.zeros(100, 768))),
+        ]
+
+        mock_runtime_config.model_class.from_pretrained.return_value = mock_model
+        mock_runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=mock_runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        # Verify lm_head.weight was set to embed_tokens weight
+        assert mock_model.lm_head.weight is mock_embed_weight
+
+    @patch("nemo_rl.models.automodel.setup.torch.optim.lr_scheduler.LambdaLR")
+    @patch("nemo_rl.models.automodel.setup.torch.distributed.get_rank")
+    @patch("nemo_rl.models.automodel.setup.init_empty_weights")
+    @patch("nemo_rl.models.automodel.setup.get_class")
+    def test_setup_model_with_cpu_offload(
+        self,
+        mock_get_class,
+        mock_init_empty_weights,
+        mock_get_rank,
+        mock_lambda_lr,
+        mock_config,
+        mock_autoconfig,
+        mock_distributed_manager,
+        mock_checkpoint_manager,
+        mock_tokenizer,
+    ):
+        """Test model setup with CPU offload."""
+        mock_get_rank.return_value = 0
+        mock_lambda_lr.return_value = MagicMock()
+
+        runtime_config = RuntimeConfig(
+            model_class=MagicMock(),
+            model_config=mock_autoconfig,
+            hf_config_overrides={},
+            allow_flash_attn_args=True,
+            attn_impl=None,
+            dtype=torch.bfloat16,
+            enable_seq_packing=False,
+            max_grad_norm=1.0,
+            cpu_offload=True,  # CPU offload enabled
+            offload_optimizer_for_logprob=False,
+            is_generation_colocated=None,
+            is_reward_model=False,
+        )
+
+        mock_buffer = MagicMock()
+        mock_buffer.data = MagicMock()
+        mock_buffer.data.to.return_value = mock_buffer.data
+
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {}
+        mock_model.config = MagicMock()
+        mock_model.config.pad_token_id = 0
+        mock_model.buffers.return_value = [mock_buffer]
+        runtime_config.model_class.from_pretrained.return_value = mock_model
+        runtime_config.model_config.architectures = ["GPT2LMHeadModel"]
+        mock_distributed_manager.parallelize.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_get_class.return_value = MagicMock(return_value=mock_optimizer)
+
+        setup_model_and_optimizer(
+            config=mock_config,
+            tokenizer=mock_tokenizer,
+            runtime_config=runtime_config,
+            distributed_manager=mock_distributed_manager,
+            checkpoint_manager=mock_checkpoint_manager,
+        )
+
+        # Verify buffers were moved to CPU
+        mock_buffer.data.to.assert_called_with("cpu")
+        # Verify model was moved to CPU
+        mock_model.to.assert_called_with("cpu")
diff --git a/tests/unit/models/generation/test_sglang_generation.py b/tests/unit/models/generation/test_sglang_generation.py
new file mode 100644
index 0000000000..299bd8e3d6
--- /dev/null
+++ b/tests/unit/models/generation/test_sglang_generation.py
@@ -0,0 +1,927 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for SGLang generation backend.
+
+These tests verify that the SGLang generation backend produces sane outputs.
+While not true unit tests, they validate the generation quality in unit test runs.
+"""
+
+import gc
+from copy import deepcopy
+
+import pytest
+import ray
+import torch
+
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.models.generation.sglang import SGLangConfig, SGLangGeneration
+
+model_name = "Qwen/Qwen3-0.6B"
+
+# Define basic SGLang test config
+basic_sglang_test_config: SGLangConfig = {
+    "backend": "sglang",
+    "model_name": model_name,
+    "model_path": model_name,
+    "tokenizer": {
+        "name": model_name,
+    },
+    "dtype": "bfloat16",
+    "max_new_tokens": 5,  # Small number of tokens for testing
+    "temperature": 1.0,
+    "top_p": 1.0,
+    "top_k": None,
+    "stop_token_ids": None,
+    "stop_strings": None,
+    "sglang_cfg": {
+        "model_path": model_name,
+        "gpus_per_server": 2,
+        "dtype": "bfloat16",
+        "context_length": 1024,
+        "log_level": "warning",
+        "skip_server_warmup": True,
+        "enable_memory_saver": False,
+        "dp_size": 1,
+        "pp_size": 1,
+        "ep_size": 1,
+        "mem_fraction_static": 0.7,
+    },
+    "colocated": {
+        "enabled": True,
+        "resources": {
+            "gpus_per_node": None,
+            "num_nodes": None,
+        },
+    },
+    "sglang_kwargs": {},
+}
+
+# Basic DTensor test config for Policy tests
+basic_dtensor_test_config = {
+    "model_name": model_name,
+    "tokenizer": {
+        "name": model_name,
+    },
+    "train_global_batch_size": 1,
+    "train_micro_batch_size": 1,
+    "learning_rate": 5e-6,
+    "logprob_batch_size": 1,
+    "max_new_tokens": 16,
+    "do_sample": False,
+    "precision": "float32",
+    "offload_optimizer_for_logprob": False,
+    "optimizer": {
+        "name": "torch.optim.AdamW",
+        "kwargs": {
+            "lr": 5e-6,
+            "weight_decay": 0.01,
+            "betas": [0.9, 0.999],
+            "eps": 1e-8,
+        },
+    },
+    "dtensor_cfg": {
+        "_v2": True,  # Use DTensorPolicyWorkerV2 for stream_weights_via_http
+        "enabled": True,
+        "cpu_offload": False,
+        "sequence_parallel": False,
+        "activation_checkpointing": False,
+        "tensor_parallel_size": 2,
+        "context_parallel_size": 1,
+        "custom_parallel_plan": None,
+    },
+    "dynamic_batching": {
+        "enabled": True,
+        "train_mb_tokens": 40,
+        "logprob_mb_tokens": 40,
+        "sequence_length_round": 4,
+    },
+    "sequence_packing": {
+        "enabled": False,
+    },
+    "max_grad_norm": 1.0,
+    "make_sequence_length_divisible_by": 1,
+    "generation": deepcopy(basic_sglang_test_config),
+}
+
+
+def configure_sglang_config(
+    config: SGLangConfig, tokenizer, is_eval=True
+) -> SGLangConfig:
+    """Apply specific configurations to SGLang config."""
+    config = deepcopy(config)
+    config["_pad_token_id"] = tokenizer.pad_token_id
+    if config["stop_token_ids"] is None:
+        config["stop_token_ids"] = [tokenizer.eos_token_id]
+    return config
+
+
+@pytest.fixture(scope="function")
+def cluster():
+    """Create a virtual cluster for testing with 2 GPUs."""
+    virtual_cluster = RayVirtualCluster(
+        bundle_ct_per_node_list=[2],
+        use_gpus=True,
+        max_colocated_worker_groups=2,
+        num_gpus_per_node=2,
+        name="sglang-test-cluster",
+    )
+    yield virtual_cluster
+    virtual_cluster.shutdown()
+
+
+@pytest.fixture(scope="function")
+def tokenizer():
+    """Initialize tokenizer for the test model."""
+    tokenizer = get_tokenizer(basic_sglang_test_config["tokenizer"])
+    return tokenizer
+
+
+@pytest.fixture(scope="function")
+def policy(cluster, tokenizer):
+    """Initialize the SGLang policy."""
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config = configure_sglang_config(sglang_config, tokenizer)
+    p = SGLangGeneration(cluster, sglang_config)
+    yield p
+    try:
+        p.shutdown()
+        gc.collect()
+        torch.cuda.empty_cache()
+    except Exception as e:
+        print(f"Error during policy cleanup: {e}")
+
+
+@pytest.fixture(scope="function")
+def test_input_data(tokenizer):
+    """Create test input data for inference."""
+    test_prompts = [
+        "Hello, my name is",
+        "The capital of France is",
+    ]
+
+    # Tokenize prompts
+    encodings = tokenizer(
+        test_prompts,
+        padding="max_length",
+        max_length=20,
+        truncation=True,
+        return_tensors="pt",
+        padding_side="right",
+    )
+
+    # Calculate input lengths from attention mask
+    input_lengths = encodings["attention_mask"].sum(dim=1).to(torch.int32)
+
+    # Create input data dictionary
+    return BatchedDataDict(
+        {
+            "input_ids": encodings["input_ids"],
+            "input_lengths": input_lengths,
+        }
+    )
+
+
+@pytest.fixture(scope="function")
+def policy_cluster_separate():
+    """Create a virtual cluster for the Policy, using 2 GPUs."""
+    cluster = RayVirtualCluster(
+        bundle_ct_per_node_list=[2],
+        use_gpus=True,
+        max_colocated_worker_groups=2,
+        num_gpus_per_node=2,
+        name="sglang-test-policy-cluster-separate",
+    )
+    yield cluster
+    try:
+        cluster.shutdown()
+    except Exception as e:
+        print(f"Error during policy_cluster_separate shutdown: {e}")
+
+
+def get_generation_cluster_separate(num_gpus_per_node: int = 2) -> RayVirtualCluster:
+    """Create a virtual cluster for the SGLangGeneration policy."""
+    return RayVirtualCluster(
+        bundle_ct_per_node_list=[num_gpus_per_node],
+        use_gpus=True,
+        max_colocated_worker_groups=1,
+        num_gpus_per_node=num_gpus_per_node,
+        name="sglang-test-generation-cluster-separate",
+    )
+
+
+# =============================================================================
+# Basic Configuration Tests
+# =============================================================================
+
+
+@pytest.mark.sglang
+@pytest.mark.timeout(120)
+def test_sglang_missing_required_config_key(cluster, tokenizer):
+    """Test that an error is raised when a required config key is missing."""
+    # SGLang requires sglang_cfg to be present
+    incomplete_config = deepcopy(basic_sglang_test_config)
+    incomplete_config = configure_sglang_config(incomplete_config, tokenizer)
+    del incomplete_config["sglang_cfg"]
+
+    with pytest.raises((KeyError, ValueError, AssertionError, TypeError)):
+        SGLangGeneration(cluster, incomplete_config)
+
+
+@pytest.mark.sglang
+def test_sglang_top_p_top_k_validation(cluster, tokenizer):
+    """Test that top_p and top_k values are accepted by SGLang.
+
+    Note: SGLang may have different validation thresholds than vLLM.
+    This test verifies that reasonable sampling parameters are accepted.
+    """
+    # Test that reasonable top_p and top_k values are accepted
+    config = deepcopy(basic_sglang_test_config)
+    config["top_p"] = 0.95
+    config["top_k"] = 50
+    config = configure_sglang_config(config, tokenizer)
+
+    policy = None
+    try:
+        policy = SGLangGeneration(cluster, config)
+        print("Successfully initialized with top_p=0.95 and top_k=50")
+    except Exception as e:
+        pytest.fail(f"Should not raise error with reasonable sampling params: {e}")
+    finally:
+        if policy:
+            policy.shutdown()
+            gc.collect()
+            torch.cuda.empty_cache()
+
+
+# =============================================================================
+# Basic Generation Tests
+# =============================================================================
+
+
+@pytest.mark.sglang
+@pytest.mark.timeout(180)
+def test_sglang_policy_generation(policy, test_input_data, tokenizer):
+    """Test SGLang policy generation capabilities."""
+    print("Testing SGLang generation...")
+    outputs = policy.generate(test_input_data)
+
+    # Validate outputs format
+    assert "output_ids" in outputs, "output_ids not found in generation output"
+    assert "logprobs" in outputs, "logprobs not found in generation output"
+    assert "generation_lengths" in outputs, (
+        "generation_lengths not found in generation output"
+    )
+    assert "unpadded_sequence_lengths" in outputs, (
+        "unpadded_sequence_lengths not found in generation output"
+    )
+
+    # Validate outputs shape and content
+    assert outputs["output_ids"].shape[0] == len(test_input_data["input_ids"]), (
+        "Wrong batch size in output"
+    )
+    assert outputs["generation_lengths"].shape[0] == len(
+        test_input_data["input_ids"]
+    ), "Wrong batch size in generation_lengths"
+
+    # Decode and check outputs
+    generated_sequences = outputs["output_ids"]
+    generated_texts = tokenizer.batch_decode(
+        generated_sequences, skip_special_tokens=True
+    )
+
+    print(f"Generated texts: {generated_texts}")
+
+    # All texts should have a non-zero length
+    assert all(len(text) > 0 for text in generated_texts), (
+        "Some generated texts are empty"
+    )
+
+
+@pytest.mark.sglang
+def test_sglang_worker_seed_behavior(cluster, tokenizer):
+    """
+    Test that different workers generate different outputs for identical prompts due to different seeds.
+    This ensures proper randomization across distributed workers for diverse exploration in RLHF.
+
+    Key: Use gpus_per_server=1 to create 2 independent SGLang servers (each with its own seed),
+    rather than 1 server with TP=2.
+    """
+    from nemo_rl.algorithms.grpo import refit_policy_generation
+    from nemo_rl.models.policy.lm_policy import Policy
+
+    unique_prompts = [
+        "Hello, my name is",
+        "The capital of France is",
+    ]
+
+    # Create a batch where each prompt appears twice
+    # When sharded, different workers will get the same prompt
+    duplicated_prompts = unique_prompts + unique_prompts
+
+    # Tokenize prompts
+    encodings = tokenizer(
+        duplicated_prompts,
+        padding="max_length",
+        max_length=20,
+        truncation=True,
+        return_tensors="pt",
+        padding_side="right",
+    )
+
+    input_lengths = encodings["attention_mask"].sum(dim=1).to(torch.int32)
+
+    # Create input data dictionary
+    duplicated_batch = BatchedDataDict(
+        {
+            "input_ids": encodings["input_ids"],
+            "input_lengths": input_lengths,
+        }
+    )
+
+    # Test with gpus_per_server=1 to create 2 independent servers with different seeds
+    print("Creating SGLang policy with gpus_per_server=1 (2 independent servers)...")
+    sglang_config = deepcopy(basic_sglang_test_config)
+    # Use gpus_per_server=1 to create 2 independent SGLang servers
+    sglang_config["sglang_cfg"]["gpus_per_server"] = 1
+    sglang_config = configure_sglang_config(sglang_config, tokenizer)
+
+    policy = SGLangGeneration(cluster, sglang_config)
+    policy.finish_generation()
+
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["dtensor_cfg"]["tensor_parallel_size"] = 1  # Match gpus_per_server
+    lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
+    state_dict_info = lm_policy.prepare_refit_info()
+    policy.prepare_refit_info(state_dict_info)
+
+    print("Refitting SGLang policy...")
+    refit_policy_generation(lm_policy, policy, sglang_config["colocated"]["enabled"])
+
+    try:
+        # Generate with duplicated prompts
+        print("Running generation with duplicated prompts...")
+        outputs = policy.generate(duplicated_batch, greedy=False)
+
+        # Decode the generated sequences
+        gen_texts = tokenizer.batch_decode(
+            outputs["output_ids"], skip_special_tokens=True
+        )
+
+        print(f"Generated texts with duplicated prompts: {gen_texts}")
+
+        # Check if the duplicated prompts generated different texts
+        # The first half and second half should be different due to different worker seeds
+        first_half = gen_texts[: len(unique_prompts)]
+        second_half = gen_texts[len(unique_prompts) :]
+
+        print(f"First worker outputs: {first_half}")
+        print(f"Second worker outputs: {second_half}")
+
+        # At least one of the pairs should be different due to different seeds
+        assert first_half != second_half, (
+            "Different workers should generate different outputs for identical prompts due to different seeds"
+        )
+
+    finally:
+        # Clean up resources
+        if "policy" in locals() and hasattr(policy, "shutdown"):
+            policy.shutdown()
+        if "lm_policy" in locals() and hasattr(lm_policy, "shutdown"):
+            lm_policy.shutdown()
+
+        # Force garbage collection
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+@pytest.mark.sglang
+def test_sglang_policy_tensor_parallel(cluster, tokenizer):
+    """Test SGLang policy with tensor parallelism > 1 (gpus_per_server=2)."""
+    # Configure with gpus_per_server=2 for tensor parallelism
+    tp_config = deepcopy(basic_sglang_test_config)
+    tp_config = configure_sglang_config(tp_config, tokenizer)
+    tp_config["sglang_cfg"]["gpus_per_server"] = 2  # TP=2
+
+    sglang_policy = None
+    try:
+        sglang_policy = SGLangGeneration(cluster, tp_config)
+
+        # Create simple test input
+        test_prompts = ["Hello, my name is", "The capital of France is"]
+        encodings = tokenizer(
+            test_prompts,
+            padding="max_length",
+            max_length=10,
+            truncation=True,
+            return_tensors="pt",
+            padding_side="right",
+        )
+
+        test_input_data = BatchedDataDict(
+            {
+                "input_ids": encodings["input_ids"],
+                "input_lengths": encodings["attention_mask"].sum(dim=1).to(torch.int32),
+            }
+        )
+
+        # Test generation with tensor parallelism
+        outputs = sglang_policy.generate(test_input_data)
+
+        sglang_policy.finish_generation()
+        sglang_policy.prepare_for_generation()
+
+        # Test generation again after cache reset
+        outputs = sglang_policy.generate(test_input_data)
+
+        assert "output_ids" in outputs, "output_ids not found in generation output"
+        assert outputs["output_ids"].shape[0] == 2, "Wrong batch size in output"
+
+        # Decode and check output
+        generated_text = tokenizer.decode(
+            outputs["output_ids"][0], skip_special_tokens=True
+        )
+        print(f"Generated text with TP=2: {generated_text}")
+        assert len(generated_text) > 0, "Generated text is empty"
+
+    finally:
+        # Clean up resources
+        if sglang_policy:
+            sglang_policy.shutdown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+@pytest.mark.sglang
+def test_sglang_generate_text(cluster, tokenizer):
+    """Test that SGLang can generate coherent text.
+
+    Note: SGLang doesn't have a generate_text method like vLLM,
+    so we use generate + tokenizer decode to verify text generation.
+    """
+    # Prepare test data
+    test_prompts = [
+        "Hello, my name is",
+        "The capital of France is",
+    ]
+
+    encodings = tokenizer(
+        test_prompts,
+        padding="max_length",
+        max_length=10,
+        truncation=True,
+        return_tensors="pt",
+        padding_side="right",
+    )
+
+    test_input_data = BatchedDataDict(
+        {
+            "input_ids": encodings["input_ids"],
+            "input_lengths": encodings["attention_mask"].sum(dim=1).to(torch.int32),
+        }
+    )
+
+    # Create SGLang config with gpus_per_server=2 (using tensor parallelism)
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config["sglang_cfg"]["gpus_per_server"] = 2
+    sglang_config = configure_sglang_config(sglang_config, tokenizer, is_eval=True)
+
+    # Ensure correct model
+    assert sglang_config["model_name"] == "Qwen/Qwen3-0.6B", (
+        "Model name should be Qwen/Qwen3-0.6B to get expected output"
+    )
+
+    sglang_generation = None
+    try:
+        # Create SGLang generation
+        sglang_generation = SGLangGeneration(cluster, sglang_config)
+
+        # Generate with greedy decoding for deterministic output
+        output = sglang_generation.generate(test_input_data, greedy=True)
+
+        # Decode generated text
+        generated_texts = tokenizer.batch_decode(
+            output["output_ids"], skip_special_tokens=True
+        )
+
+        print(f"Generated texts: {generated_texts}")
+
+        # Verify we got non-empty text for each prompt
+        for i, text in enumerate(generated_texts):
+            assert len(text) > len(test_prompts[i]), (
+                f"Generated text should be longer than input prompt: {text}"
+            )
+            # Verify the generated text starts with or contains the prompt
+            print(f"Prompt: {test_prompts[i]} -> Generated: {text}")
+
+    finally:
+        # Clean up
+        if sglang_generation:
+            sglang_generation.shutdown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+def _wait_for_sglang_http_server_spinup(base_url: str):
+    """Wait for the SGLang HTTP server to be ready."""
+    import time
+
+    import requests
+
+    max_wait = 60  # 60 seconds max wait
+    start = time.time()
+    while time.time() - start < max_wait:
+        try:
+            response = requests.get(f"{base_url}/health_generate", timeout=5)
+            if response.status_code == 200:
+                return
+        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+            pass
+        time.sleep(1)
+    raise TimeoutError(f"SGLang server at {base_url} did not start within {max_wait}s")
+
+
+@pytest.mark.sglang
+def test_sglang_http_server(cluster, tokenizer):
+    """Test that SGLang HTTP server works with direct API calls.
+
+    SGLang exposes a /generate endpoint that accepts input_ids and sampling_params.
+    This test verifies we can make direct HTTP requests to the SGLang server.
+    """
+    import requests
+
+    # Create SGLang config
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config = configure_sglang_config(sglang_config, tokenizer, is_eval=True)
+
+    # Ensure correct model for reproducible output
+    assert sglang_config["model_name"] == "Qwen/Qwen3-0.6B", (
+        "Model name should be Qwen/Qwen3-0.6B to get expected output"
+    )
+
+    sglang_generation = None
+    try:
+        # Create SGLang generation (this starts the servers)
+        sglang_generation = SGLangGeneration(cluster, sglang_config)
+
+        # Get server URLs
+        base_urls = sglang_generation.get_sglang_server_urls()
+        print(f"SGLang server URLs: {base_urls}")
+        assert len(base_urls) >= 1, "Should have at least one SGLang server"
+
+        # Wait for server to be ready
+        _wait_for_sglang_http_server_spinup(base_urls[0])
+
+        # Prepare input - tokenize "count to 5"
+        test_prompt = "count to 5"
+        input_ids = tokenizer.encode(test_prompt, add_special_tokens=True)
+
+        # Build request payload for SGLang /generate endpoint
+        payload = {
+            "input_ids": input_ids,
+            "sampling_params": {
+                "temperature": 0.0,  # Greedy for determinism
+                "top_p": 1.0,
+                "max_new_tokens": 5,
+            },
+            "return_logprob": True,
+        }
+
+        # Make request to SGLang server
+        response = requests.post(
+            url=f"{base_urls[0]}/generate",
+            json=payload,
+            headers={"Content-Type": "application/json"},
+            timeout=30,
+        )
+        actual_result = response.json()
+        print(f"SGLang response: {actual_result}")
+
+        # Verify response structure
+        assert response.status_code == 200, f"Expected 200, got {response.status_code}"
+        assert "meta_info" in actual_result, "Response should contain meta_info"
+
+        meta_info = actual_result["meta_info"]
+        assert "output_token_logprobs" in meta_info, (
+            "meta_info should contain output_token_logprobs"
+        )
+
+        # Verify we got some generated tokens
+        output_token_logprobs = meta_info["output_token_logprobs"]
+        assert len(output_token_logprobs) > 0, (
+            "Should have generated at least one token"
+        )
+
+        # Each entry should be [logprob, token_id]
+        first_token_info = output_token_logprobs[0]
+        assert len(first_token_info) >= 2, (
+            "Each token info should have logprob and token_id"
+        )
+
+        logprob = first_token_info[0]
+        token_id = first_token_info[1]
+        assert isinstance(logprob, float), "Logprob should be a float"
+        assert isinstance(token_id, int), "Token ID should be an int"
+
+        print(f"First generated token: id={token_id}, logprob={logprob}")
+
+        # Decode the generated tokens to verify text output
+        generated_token_ids = [item[1] for item in output_token_logprobs]
+        generated_text = tokenizer.decode(generated_token_ids, skip_special_tokens=True)
+        print(f"Generated text: {generated_text}")
+
+    finally:
+        # Clean up
+        if sglang_generation:
+            sglang_generation.shutdown()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+@pytest.mark.sglang
+@pytest.mark.timeout(180)
+def test_sglang_non_divisible_batch_handling(policy):
+    """Test that SGLang generation handles non divisible input batches correctly."""
+    empty_batch = BatchedDataDict(
+        {
+            "input_ids": torch.zeros((1, 1), dtype=torch.long),
+            "input_lengths": torch.ones(1, dtype=torch.long),
+        }
+    )
+
+    outputs = policy.generate(empty_batch)
+
+    required_keys = [
+        "output_ids",
+        "logprobs",
+        "generation_lengths",
+        "unpadded_sequence_lengths",
+    ]
+    assert all(key in outputs for key in required_keys), (
+        "Missing required output fields"
+    )
+    assert all(outputs[key].shape[0] == 1 for key in required_keys), (
+        "Output tensors should have batch dimension of 1"
+    )
+
+
+# =============================================================================
+# Policy Integration Tests
+# =============================================================================
+
+
+@pytest.mark.sglang
+@pytest.mark.timeout(300)
+def test_sglang_generation_with_hf_training_colocated(cluster, tokenizer):
+    """Test that DTensor policy can work together with colocated SGLang policy."""
+    from nemo_rl.algorithms.grpo import refit_policy_generation
+    from nemo_rl.models.policy.lm_policy import Policy
+
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config = configure_sglang_config(sglang_config, tokenizer)
+
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["train_global_batch_size"] = 4
+    dtensor_config["dtensor_cfg"]["_v2"] = (
+        True  # Use DTensorPolicyWorkerV2 for stream_weights_via_http
+    )
+
+    sglang_policy = None
+    lm_policy = None
+
+    try:
+        print("Creating SGLang policy...")
+        sglang_policy = SGLangGeneration(cluster, sglang_config)
+        sglang_policy.finish_generation()
+
+        print("Creating DTensor policy...")
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
+        print("Preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        sglang_policy.prepare_refit_info(state_dict_info)
+
+        print("Refitting SGLang policy...")
+        refit_policy_generation(
+            lm_policy, sglang_policy, sglang_config["colocated"]["enabled"]
+        )
+
+        # Test generation
+        test_prompts = ["Hello, my name is", "The capital of France is"]
+        encodings = tokenizer(
+            test_prompts,
+            padding="max_length",
+            max_length=20,
+            truncation=True,
+            return_tensors="pt",
+            padding_side="right",
+        )
+        test_input_data = BatchedDataDict(
+            {
+                "input_ids": encodings["input_ids"],
+                "input_lengths": encodings["attention_mask"].sum(dim=1).to(torch.int32),
+            }
+        )
+
+        outputs = sglang_policy.generate(test_input_data, greedy=True)
+        assert "output_ids" in outputs, "output_ids not found in generation output"
+
+        generated_texts = tokenizer.batch_decode(
+            outputs["output_ids"], skip_special_tokens=True
+        )
+        print(f"Generated texts: {generated_texts}")
+
+    finally:
+        if sglang_policy:
+            sglang_policy.shutdown()
+        if lm_policy and hasattr(lm_policy, "shutdown"):
+            lm_policy.shutdown()
+
+
+@pytest.mark.skip(reason="Non-colocated mode not implemented for SGLang")
+@pytest.mark.timeout(300)
+@pytest.mark.sglang
+def test_sglang_generation_with_hf_training_non_colocated(
+    policy_cluster_separate, tokenizer
+):
+    """Test that DTensor policy can work together with non-colocated SGLang policy."""
+    from nemo_rl.algorithms.grpo import refit_policy_generation
+    from nemo_rl.models.policy.lm_policy import Policy
+
+    generation_cluster_separate = get_generation_cluster_separate(2)
+
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config = configure_sglang_config(sglang_config, tokenizer)
+    sglang_config["colocated"]["enabled"] = False
+
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["generation"]["colocated"]["enabled"] = False
+    dtensor_config["train_global_batch_size"] = 4
+    dtensor_config["dtensor_cfg"]["_v2"] = (
+        True  # Use DTensorPolicyWorkerV2 for stream_weights_via_http
+    )
+
+    sglang_policy = None
+    lm_policy = None
+
+    try:
+        print("Creating SGLang policy...")
+        sglang_policy = SGLangGeneration(generation_cluster_separate, sglang_config)
+        sglang_policy.finish_generation()
+
+        print("Creating DTensor policy...")
+        lm_policy = Policy(policy_cluster_separate, dtensor_config, tokenizer)
+
+        # Initialize collective communication
+        ip, port = policy_cluster_separate.get_master_address_and_port()
+        train_world_size = policy_cluster_separate.world_size()
+        inference_world_size = generation_cluster_separate.world_size()
+        world_size = train_world_size + inference_world_size
+
+        futures_train = lm_policy.init_collective(
+            ip, port, world_size=world_size, train_world_size=train_world_size
+        )
+        futures_inference = sglang_policy.init_collective(
+            ip, port, world_size=world_size, train_world_size=train_world_size
+        )
+        ray.get(futures_train + futures_inference)
+
+        # Prepare refit info
+        state_dict_info = lm_policy.prepare_refit_info()
+        sglang_policy.prepare_refit_info(state_dict_info)
+
+        print("Refitting SGLang policy...")
+        refit_policy_generation(lm_policy, sglang_policy, False)
+
+        # Test generation
+        test_prompts = ["Hello, my name is", "The capital of France is"]
+        encodings = tokenizer(
+            test_prompts,
+            padding="max_length",
+            max_length=20,
+            truncation=True,
+            return_tensors="pt",
+            padding_side="right",
+        )
+        test_input_data = BatchedDataDict(
+            {
+                "input_ids": encodings["input_ids"],
+                "input_lengths": encodings["attention_mask"].sum(dim=1).to(torch.int32),
+            }
+        )
+
+        outputs = sglang_policy.generate(test_input_data, greedy=True)
+        assert "output_ids" in outputs, "output_ids not found in generation output"
+
+    finally:
+        if sglang_policy:
+            sglang_policy.shutdown()
+        if lm_policy and hasattr(lm_policy, "shutdown"):
+            lm_policy.shutdown()
+        try:
+            generation_cluster_separate.shutdown()
+        except Exception as e:
+            print(f"Error during generation_cluster_separate shutdown: {e}")
+
+
+@pytest.mark.sglang
+@pytest.mark.timeout(180)
+def test_sglang_weight_update_and_prefix_cache_reset(cluster, tokenizer):
+    """Test that the SGLang prefix cache is correctly reset when weights change."""
+    from nemo_rl.models.policy.lm_policy import Policy
+
+    sglang_config = deepcopy(basic_sglang_test_config)
+    sglang_config = configure_sglang_config(sglang_config, tokenizer, is_eval=True)
+
+    dtensor_config = basic_dtensor_test_config
+
+    sglang_policy = None
+    lm_policy = None
+
+    try:
+        print("Creating DTensor policy...")
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
+        print("Creating SGLang policy...")
+        sglang_policy = SGLangGeneration(cluster, sglang_config)
+
+        print("Preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        sglang_policy.prepare_refit_info(state_dict_info)
+
+        # Prepare input data
+        text = "Answer the question. What is 2+2?"
+        test_prompt = [text, text]
+        encodings = tokenizer(
+            test_prompt,
+            padding=True,
+            return_tensors="pt",
+            padding_side="right",
+        )
+        input_ids = encodings["input_ids"]
+        input_lengths = encodings["attention_mask"].sum(dim=1).to(torch.int32)
+        test_input_data = BatchedDataDict(
+            {"input_ids": input_ids, "input_lengths": input_lengths}
+        )
+
+        print("Running Generation 1 (Initial)...")
+        sglang_policy.prepare_for_generation()
+        outputs1 = sglang_policy.generate(test_input_data, greedy=True)
+        logprob1 = outputs1["logprobs"][0, input_lengths[0]].item()
+        print(f"Logprob of first generated token (Run 1): {logprob1}")
+
+        print("Adding noise to weights in HF policy...")
+        ray.get(
+            [
+                worker._add_noise_to_weights.remote()
+                for worker in lm_policy.worker_group.workers
+            ]
+        )
+
+        print("Updating SGLang weights from DTensor policy via HTTP...")
+        # Get SGLang server URL to GPU UUID mapping
+        sglang_url_to_gpu_uuids = sglang_policy.get_sglang_url_to_gpu_uuids()
+        print(f"SGLang URL to GPU UUIDs: {sglang_url_to_gpu_uuids}")
+
+        # Stream weights via HTTP (CUDA IPC)
+        ray.get(lm_policy.stream_weights_via_http(sglang_url_to_gpu_uuids))
+
+        print("Running Generation 2 (Weights Updated)...")
+        outputs2 = sglang_policy.generate(test_input_data, greedy=True)
+        logprob2 = outputs2["logprobs"][0, input_lengths[0]].item()
+        print(f"Logprob of first generated token (Run 2): {logprob2}")
+        assert logprob2 != logprob1, "Logprobs should be different after weight update."
+
+        print("Resetting SGLang prefix cache...")
+        sglang_policy.finish_generation()
+        sglang_policy.prepare_for_generation()
+
+        print("Running Generation 3 (Cache Reset)...")
+        outputs3 = sglang_policy.generate(test_input_data, greedy=True)
+        logprob3 = outputs3["logprobs"][0, input_lengths[0]].item()
+        print(f"Logprob of first generated token (Run 3): {logprob3}")
+
+        print("Prefix cache reset verified successfully.")
+
+    finally:
+        print("Cleaning up resources...")
+        if sglang_policy:
+            sglang_policy.shutdown()
+        if lm_policy:
+            lm_policy.shutdown()
+        gc.collect()
+        torch.cuda.empty_cache()
diff --git a/tests/unit/models/generation/test_sglang_utils.py b/tests/unit/models/generation/test_sglang_utils.py
new file mode 100644
index 0000000000..396530edd7
--- /dev/null
+++ b/tests/unit/models/generation/test_sglang_utils.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for SGLang utilities.
+
+These tests verify that the SGLang utilities work as expected.
+"""
+
+import pytest
+
+from nemo_rl.models.generation.sglang.utils import AsyncLoopThread
+
+
+def test_async_loop_thread_run_returns_result():
+    loop_thread = AsyncLoopThread()
+
+    async def sample():
+        return 42
+
+    try:
+        assert loop_thread.run(sample()) == 42
+    finally:
+        loop_thread.shutdown()
+
+
+def test_async_loop_thread_run_when_stopped_raises():
+    loop_thread = AsyncLoopThread()
+    loop_thread.shutdown()
+
+    async def sample():
+        return 1
+
+    with pytest.raises(RuntimeError, match="Event loop is not running"):
+        coro = sample()
+        try:
+            loop_thread.run(coro)
+        finally:
+            coro.close()
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 1599b7e703..87730c8908 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -178,6 +178,9 @@ def get_basic_megatron_test_config(
             "moe_router_load_balancing_type": "none",
             "moe_router_bias_update_rate": 0.0,
             "moe_permute_fusion": False,
+            "moe_enable_deepep": False,
+            "moe_token_dispatcher_type": "allgather",
+            "moe_shared_expert_overlap": False,
             "apply_rope_fusion": True,
             "bias_activation_fusion": True,
             "moe_per_layer_logging": False,
diff --git a/tests/unit/models/megatron/test_megatron_setup.py b/tests/unit/models/megatron/test_megatron_setup.py
new file mode 100644
index 0000000000..61c4bc7a75
--- /dev/null
+++ b/tests/unit/models/megatron/test_megatron_setup.py
@@ -0,0 +1,1139 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Unit tests for Megatron setup utilities.
+
+This module tests the configuration validation and setup functions in
+nemo_rl.models.megatron.setup, focusing on:
+- Configuration validation functions
+- Parallelism configuration application
+- Precision and dtype configuration
+- Checkpoint configuration creation
+- Model path validation
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+
+
+@pytest.mark.mcore
+class TestValidateModelPaths:
+    """Tests for validate_model_paths function."""
+
+    def test_model_name_is_hf_model(self, tmp_path):
+        """Test with a HuggingFace model name (not a local path)."""
+        from nemo_rl.models.megatron.setup import validate_model_paths
+
+        config = {"model_name": "meta-llama/Llama-3.2-1B"}
+
+        with patch(
+            "nemo_rl.models.megatron.setup.get_megatron_checkpoint_dir",
+            return_value=str(tmp_path),
+        ):
+            hf_model_name, pretrained_path, pt_checkpoint_exists = validate_model_paths(
+                config
+            )
+
+        assert hf_model_name == "meta-llama/Llama-3.2-1B"
+        assert pretrained_path == f"{tmp_path}/meta-llama/Llama-3.2-1B"
+        assert pt_checkpoint_exists is False
+
+    def test_model_name_is_local_path(self, tmp_path):
+        """Test with a local path as model name."""
+        from nemo_rl.models.megatron.setup import validate_model_paths
+
+        local_model_path = tmp_path / "local_model"
+        local_model_path.mkdir()
+
+        config = {"model_name": str(local_model_path)}
+
+        with patch(
+            "nemo_rl.models.megatron.setup.get_megatron_checkpoint_dir",
+            return_value=str(tmp_path / "checkpoints"),
+        ):
+            hf_model_name, pretrained_path, pt_checkpoint_exists = validate_model_paths(
+                config
+            )
+
+        assert hf_model_name == str(local_model_path)
+        # Local path should be converted to model_<path> format
+        assert "model_" in pretrained_path
+        assert pt_checkpoint_exists is False
+
+    def test_checkpoint_exists(self, tmp_path):
+        """Test when a Megatron checkpoint already exists."""
+        from nemo_rl.models.megatron.setup import validate_model_paths
+
+        # Create the checkpoint directory structure
+        checkpoint_dir = tmp_path / "checkpoints" / "test-model"
+        iter_dir = checkpoint_dir / "iter_0000000"
+        iter_dir.mkdir(parents=True)
+
+        config = {"model_name": "test-model"}
+
+        with patch(
+            "nemo_rl.models.megatron.setup.get_megatron_checkpoint_dir",
+            return_value=str(tmp_path / "checkpoints"),
+        ):
+            hf_model_name, pretrained_path, pt_checkpoint_exists = validate_model_paths(
+                config
+            )
+
+        assert hf_model_name == "test-model"
+        assert pt_checkpoint_exists is True
+
+
+@pytest.mark.mcore
+class TestApplyParallelismConfig:
+    """Tests for _apply_parallelism_config function."""
+
+    def test_basic_parallelism_config(self):
+        """Test applying basic parallelism configuration."""
+        from nemo_rl.models.megatron.setup import _apply_parallelism_config
+
+        model_cfg = MagicMock()
+        config = {
+            "megatron_cfg": {
+                "tensor_model_parallel_size": 4,
+                "pipeline_model_parallel_size": 2,
+                "num_layers_in_first_pipeline_stage": None,
+                "num_layers_in_last_pipeline_stage": None,
+                "sequence_parallel": True,
+                "context_parallel_size": 1,
+            },
+            "sequence_packing": {"enabled": False},
+        }
+
+        _apply_parallelism_config(model_cfg, config)
+
+        assert model_cfg.tensor_model_parallel_size == 4
+        assert model_cfg.pipeline_model_parallel_size == 2
+        assert model_cfg.sequence_parallel is True
+        assert model_cfg.context_parallel_size == 1
+
+    def test_context_parallel_requires_sequence_packing(self):
+        """Test that context parallelism > 1 requires sequence packing."""
+        from nemo_rl.models.megatron.setup import _apply_parallelism_config
+
+        model_cfg = MagicMock()
+        config = {
+            "megatron_cfg": {
+                "tensor_model_parallel_size": 1,
+                "pipeline_model_parallel_size": 1,
+                "num_layers_in_first_pipeline_stage": None,
+                "num_layers_in_last_pipeline_stage": None,
+                "sequence_parallel": False,
+                "context_parallel_size": 2,
+            },
+            "sequence_packing": {"enabled": False},
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _apply_parallelism_config(model_cfg, config)
+
+        assert "Sequence Packing must be enabled" in str(exc_info.value)
+
+    def test_context_parallel_with_sequence_packing(self):
+        """Test context parallelism with sequence packing enabled."""
+        from nemo_rl.models.megatron.setup import _apply_parallelism_config
+
+        model_cfg = MagicMock()
+        config = {
+            "megatron_cfg": {
+                "tensor_model_parallel_size": 1,
+                "pipeline_model_parallel_size": 1,
+                "num_layers_in_first_pipeline_stage": None,
+                "num_layers_in_last_pipeline_stage": None,
+                "sequence_parallel": False,
+                "context_parallel_size": 4,
+            },
+            "sequence_packing": {"enabled": True},
+        }
+
+        _apply_parallelism_config(model_cfg, config)
+
+        assert model_cfg.context_parallel_size == 4
+
+
+@pytest.mark.mcore
+class TestApplyMoeConfig:
+    """Tests for _apply_moe_config function."""
+
+    def test_moe_configuration(self):
+        """Test applying MoE configuration."""
+        from nemo_rl.models.megatron.setup import _apply_moe_config
+
+        model_cfg = MagicMock()
+        config = {
+            "megatron_cfg": {
+                "expert_tensor_parallel_size": 2,
+                "expert_model_parallel_size": 4,
+                "moe_router_dtype": "float32",
+                "moe_router_load_balancing_type": "none",
+                "moe_router_bias_update_rate": 0.0,
+                "moe_permute_fusion": True,
+                "moe_enable_deepep": False,
+                "moe_token_dispatcher_type": "allgather",
+                "moe_shared_expert_overlap": True,
+            }
+        }
+
+        _apply_moe_config(model_cfg, config)
+
+        assert model_cfg.expert_tensor_parallel_size == 2
+        assert model_cfg.expert_model_parallel_size == 4
+        assert model_cfg.moe_router_dtype == "float32"
+        assert model_cfg.moe_router_load_balancing_type == "none"
+        assert model_cfg.moe_router_bias_update_rate == 0.0
+        assert model_cfg.moe_permute_fusion is True
+        assert model_cfg.moe_enable_deepep is False
+        assert model_cfg.moe_token_dispatcher_type == "allgather"
+        assert model_cfg.moe_shared_expert_overlap is True
+
+
+@pytest.mark.mcore
+class TestApplyPrecisionConfig:
+    """Tests for _apply_precision_config function."""
+
+    @pytest.mark.parametrize(
+        "dtype,expected_bf16,expected_fp16,expected_params_dtype",
+        [
+            (torch.bfloat16, True, False, torch.bfloat16),
+            (torch.float16, False, True, torch.float16),
+            (torch.float32, False, False, torch.float32),
+        ],
+        ids=["bfloat16", "float16", "float32"],
+    )
+    def test_precision_configurations(
+        self, dtype, expected_bf16, expected_fp16, expected_params_dtype
+    ):
+        """Test precision configuration for different dtypes."""
+        from nemo_rl.models.megatron.setup import _apply_precision_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = False
+        config = {
+            "megatron_cfg": {
+                "pipeline_dtype": "bfloat16",
+            }
+        }
+
+        _apply_precision_config(model_cfg, config, dtype)
+
+        assert model_cfg.bf16 == expected_bf16
+        assert model_cfg.fp16 == expected_fp16
+        assert model_cfg.params_dtype == expected_params_dtype
+
+    def test_pipeline_dtype_mapping(self):
+        """Test that pipeline dtype is correctly mapped."""
+        from nemo_rl.models.megatron.setup import _apply_precision_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = False
+
+        for dtype_str, expected_dtype in [
+            ("float32", torch.float32),
+            ("bfloat16", torch.bfloat16),
+            ("float16", torch.float16),
+        ]:
+            config = {
+                "megatron_cfg": {
+                    "pipeline_dtype": dtype_str,
+                }
+            }
+            _apply_precision_config(model_cfg, config, torch.float32)
+            assert model_cfg.pipeline_dtype == expected_dtype
+
+
+@pytest.mark.mcore
+class TestApplyPerformanceConfig:
+    """Tests for _apply_performance_config function."""
+
+    def test_basic_performance_config(self):
+        """Test applying basic performance configuration."""
+        from nemo_rl.models.megatron.setup import _apply_performance_config
+
+        model_cfg = MagicMock()
+        model_cfg.gated_linear_unit = True
+        config = {
+            "megatron_cfg": {
+                "activation_checkpointing": False,
+                "apply_rope_fusion": True,
+                "bias_activation_fusion": True,
+            }
+        }
+
+        _apply_performance_config(model_cfg, config)
+
+        assert model_cfg.parallel_output is True
+        assert model_cfg.apply_rope_fusion is True
+        assert model_cfg.bias_activation_fusion is True
+
+    def test_activation_checkpointing_enabled(self):
+        """Test activation checkpointing configuration."""
+        from nemo_rl.models.megatron.setup import _apply_performance_config
+
+        model_cfg = MagicMock()
+        model_cfg.gated_linear_unit = True
+        config = {
+            "megatron_cfg": {
+                "activation_checkpointing": True,
+                "apply_rope_fusion": False,
+                "bias_activation_fusion": False,
+            }
+        }
+
+        _apply_performance_config(model_cfg, config)
+
+        assert model_cfg.recompute_granularity == "full"
+        assert model_cfg.recompute_method == "uniform"
+        assert model_cfg.recompute_num_layers == 1
+
+    def test_activation_func_required_when_not_gated(self):
+        """Test that activation_func is required when not using gated_linear_unit."""
+        from nemo_rl.models.megatron.setup import _apply_performance_config
+
+        model_cfg = MagicMock()
+        model_cfg.gated_linear_unit = False
+        model_cfg.activation_func = None
+        config = {
+            "megatron_cfg": {
+                "activation_checkpointing": False,
+                "apply_rope_fusion": False,
+                "bias_activation_fusion": False,
+            }
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _apply_performance_config(model_cfg, config)
+
+        assert "activation_func must be set" in str(exc_info.value)
+
+    def test_fp8_configuration(self):
+        """Test FP8 configuration."""
+        from nemo_rl.models.megatron.setup import _apply_performance_config
+
+        model_cfg = MagicMock()
+        model_cfg.gated_linear_unit = True
+        config = {
+            "megatron_cfg": {
+                "activation_checkpointing": False,
+                "apply_rope_fusion": False,
+                "bias_activation_fusion": False,
+                "fp8_cfg": {
+                    "enabled": True,
+                    "fp8": "e4m3",
+                    "fp8_recipe": "default",
+                    "fp8_param": False,
+                },
+            }
+        }
+
+        _apply_performance_config(model_cfg, config)
+
+        assert model_cfg.fp8 == "e4m3"
+        assert model_cfg.fp8_recipe == "default"
+        assert model_cfg.fp8_param is False
+
+    def test_fp8_param_warning(self):
+        """Test that fp8_param=True generates a warning."""
+        from nemo_rl.models.megatron.setup import _apply_performance_config
+
+        model_cfg = MagicMock()
+        model_cfg.gated_linear_unit = True
+        config = {
+            "megatron_cfg": {
+                "activation_checkpointing": False,
+                "apply_rope_fusion": False,
+                "bias_activation_fusion": False,
+                "fp8_cfg": {
+                    "enabled": True,
+                    "fp8": "e4m3",
+                    "fp8_recipe": "default",
+                    "fp8_param": True,
+                },
+            }
+        }
+
+        with pytest.warns(UserWarning, match="fp8_param=True sometimes causes NaN"):
+            _apply_performance_config(model_cfg, config)
+
+
+@pytest.mark.mcore
+class TestValidateOptimizerConfig:
+    """Tests for _validate_optimizer_config function."""
+
+    def test_cpu_offload_requires_full_fraction(self):
+        """Test that CPU offload requires offload_fraction=1.0."""
+        from nemo_rl.models.megatron.setup import _validate_optimizer_config
+
+        config = {
+            "megatron_cfg": {
+                "optimizer": {
+                    "optimizer_cpu_offload": True,
+                    "optimizer_offload_fraction": 0.5,
+                }
+            }
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_optimizer_config(config)
+
+        assert "optimizer_offload_fraction=1.0" in str(exc_info.value)
+
+    def test_cpu_offload_with_full_fraction(self):
+        """Test that CPU offload works with full fraction."""
+        from nemo_rl.models.megatron.setup import _validate_optimizer_config
+
+        config = {
+            "megatron_cfg": {
+                "optimizer": {
+                    "optimizer_cpu_offload": True,
+                    "optimizer_offload_fraction": 1.0,
+                }
+            }
+        }
+
+        # Should not raise
+        _validate_optimizer_config(config)
+
+    def test_no_cpu_offload(self):
+        """Test configuration without CPU offload."""
+        from nemo_rl.models.megatron.setup import _validate_optimizer_config
+
+        config = {
+            "megatron_cfg": {
+                "optimizer": {
+                    "optimizer_cpu_offload": False,
+                    "optimizer_offload_fraction": 0.5,  # Should be ignored
+                }
+            }
+        }
+
+        # Should not raise
+        _validate_optimizer_config(config)
+
+
+@pytest.mark.mcore
+class TestValidateChunkingConfig:
+    """Tests for _validate_chunking_config function."""
+
+    def test_logprob_chunk_requires_defer_fp32_logits(self):
+        """Test that logprob chunking requires defer_fp32_logits=True."""
+        from nemo_rl.models.megatron.setup import _validate_chunking_config
+
+        config = {
+            "logprob_chunk_size": 1024,
+            "megatron_cfg": {
+                "defer_fp32_logits": False,
+            },
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_chunking_config(config)
+
+        assert "defer_fp32_logits must be True" in str(exc_info.value)
+
+    def test_logprob_chunk_with_defer_fp32_logits(self):
+        """Test that logprob chunking works with defer_fp32_logits=True."""
+        from nemo_rl.models.megatron.setup import _validate_chunking_config
+
+        config = {
+            "logprob_chunk_size": 1024,
+            "megatron_cfg": {
+                "defer_fp32_logits": True,
+            },
+        }
+
+        # Should not raise
+        _validate_chunking_config(config)
+
+    @pytest.mark.parametrize(
+        "logprob_chunk_size",
+        [None, 0, -1],
+        ids=["none", "zero", "negative"],
+    )
+    def test_no_chunking_skips_validation(self, logprob_chunk_size):
+        """Test that validation is skipped when chunking is disabled."""
+        from nemo_rl.models.megatron.setup import _validate_chunking_config
+
+        config = {
+            "logprob_chunk_size": logprob_chunk_size,
+            "megatron_cfg": {
+                "defer_fp32_logits": False,  # Doesn't matter when chunking is disabled
+            },
+        }
+
+        # Should not raise
+        _validate_chunking_config(config)
+
+    def test_missing_logprob_chunk_size(self):
+        """Test that missing logprob_chunk_size is handled."""
+        from nemo_rl.models.megatron.setup import _validate_chunking_config
+
+        config = {
+            "megatron_cfg": {
+                "defer_fp32_logits": False,
+            },
+        }
+
+        # Should not raise
+        _validate_chunking_config(config)
+
+
+@pytest.mark.mcore
+class TestCreateCheckpointConfig:
+    """Tests for _create_checkpoint_config function."""
+
+    def test_basic_checkpoint_config(self, tmp_path):
+        """Test creating basic checkpoint configuration."""
+        from nemo_rl.models.megatron.setup import _create_checkpoint_config
+
+        pretrained_path = str(tmp_path / "pretrained")
+        weights_path = str(tmp_path / "weights")
+
+        checkpoint_config = _create_checkpoint_config(pretrained_path, weights_path)
+
+        assert checkpoint_config.save == weights_path
+        assert checkpoint_config.load == weights_path
+        assert checkpoint_config.pretrained_checkpoint == pretrained_path
+        assert checkpoint_config.async_save is False
+        assert checkpoint_config.fully_parallel_save is True
+        assert checkpoint_config.fully_parallel_load is True
+        assert checkpoint_config.load_rng is False
+
+
+@pytest.mark.mcore
+class TestValidateTrainingConfig:
+    """Tests for _validate_training_config function."""
+
+    def test_train_iters_required(self):
+        """Test that train_iters must be set."""
+        from nemo_rl.models.megatron.setup import _validate_training_config
+
+        model_cfg = MagicMock()
+        model_cfg.moe_router_load_balancing_type = "none"
+        model_cfg.moe_aux_loss_coeff = 0
+        config = {
+            "megatron_cfg": {},
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_training_config(config, model_cfg)
+
+        assert "train_iters must be set" in str(exc_info.value)
+
+    def test_training_config_sets_required_flags(self):
+        """Test that training config sets required model flags."""
+        from nemo_rl.models.megatron.setup import _validate_training_config
+
+        model_cfg = MagicMock()
+        model_cfg.moe_router_load_balancing_type = "none"
+        model_cfg.moe_aux_loss_coeff = 0
+        config = {
+            "megatron_cfg": {
+                "train_iters": 1000,
+            },
+        }
+
+        _validate_training_config(config, model_cfg)
+
+        assert model_cfg.calculate_per_token_loss is True
+        assert model_cfg.perform_initialization is True
+
+    def test_moe_aux_loss_not_supported(self):
+        """Test that MoE aux loss is not supported."""
+        from nemo_rl.models.megatron.setup import _validate_training_config
+
+        model_cfg = MagicMock()
+        model_cfg.moe_router_load_balancing_type = "aux_loss"
+        model_cfg.moe_aux_loss_coeff = 0.1  # Non-zero
+        config = {
+            "megatron_cfg": {
+                "train_iters": 1000,
+            },
+        }
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_training_config(config, model_cfg)
+
+        assert "MoE aux loss is currently not supported" in str(exc_info.value)
+
+    def test_moe_aux_loss_with_zero_coeff_is_ok(self):
+        """Test that MoE aux loss with zero coefficient is allowed."""
+        from nemo_rl.models.megatron.setup import _validate_training_config
+
+        model_cfg = MagicMock()
+        model_cfg.moe_router_load_balancing_type = "aux_loss"
+        model_cfg.moe_aux_loss_coeff = 0  # Zero is OK
+        config = {
+            "megatron_cfg": {
+                "train_iters": 1000,
+            },
+        }
+
+        # Should not raise
+        _validate_training_config(config, model_cfg)
+
+
+@pytest.mark.mcore
+class TestValidateDtypeConfig:
+    """Tests for _validate_dtype_config function."""
+
+    def test_bfloat16_validation(self):
+        """Test bfloat16 dtype validation."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = True
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.use_precision_aware_optimizer = False
+        optimizer_cfg.bf16 = False
+        optimizer_cfg.fp16 = False
+
+        # Should not raise
+        _validate_dtype_config(torch.bfloat16, model_cfg, optimizer_cfg)
+
+    def test_bfloat16_model_flag_mismatch(self):
+        """Test bfloat16 validation fails when model.bf16=False."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False  # Mismatch!
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.use_precision_aware_optimizer = False
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_dtype_config(torch.bfloat16, model_cfg, optimizer_cfg)
+
+        assert "bf16=True must be set" in str(exc_info.value)
+
+    def test_bfloat16_with_precision_aware_optimizer(self):
+        """Test bfloat16 with precision aware optimizer requires optimizer.bf16=True."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = True
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.use_precision_aware_optimizer = True
+        optimizer_cfg.bf16 = False  # Mismatch!
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_dtype_config(torch.bfloat16, model_cfg, optimizer_cfg)
+
+        assert "optimizer.bf16=True must be set" in str(exc_info.value)
+
+    def test_float16_validation(self):
+        """Test float16 dtype validation."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = True
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.use_precision_aware_optimizer = False
+
+        # Should not raise
+        _validate_dtype_config(torch.float16, model_cfg, optimizer_cfg)
+
+    def test_float16_model_flag_mismatch(self):
+        """Test float16 validation fails when model.fp16=False."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = False  # Mismatch!
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.use_precision_aware_optimizer = False
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_dtype_config(torch.float16, model_cfg, optimizer_cfg)
+
+        assert "fp16=True must be set" in str(exc_info.value)
+
+    def test_float32_validation(self):
+        """Test float32 dtype validation."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.bf16 = False
+        optimizer_cfg.fp16 = False
+
+        # Should not raise
+        _validate_dtype_config(torch.float32, model_cfg, optimizer_cfg)
+
+    def test_float32_with_bf16_model_flag(self):
+        """Test float32 validation fails when model has bf16=True."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = True  # Mismatch!
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.bf16 = False
+        optimizer_cfg.fp16 = False
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_dtype_config(torch.float32, model_cfg, optimizer_cfg)
+
+        assert "bf16=False" in str(exc_info.value)
+
+    def test_float32_with_fp16_optimizer_flag(self):
+        """Test float32 validation fails when optimizer has fp16=True."""
+        from nemo_rl.models.megatron.setup import _validate_dtype_config
+
+        model_cfg = MagicMock()
+        model_cfg.bf16 = False
+        model_cfg.fp16 = False
+
+        optimizer_cfg = MagicMock()
+        optimizer_cfg.bf16 = False
+        optimizer_cfg.fp16 = True  # Mismatch!
+
+        with pytest.raises(AssertionError) as exc_info:
+            _validate_dtype_config(torch.float32, model_cfg, optimizer_cfg)
+
+        assert "optimizer" in str(exc_info.value).lower()
+
+
+@pytest.mark.mcore
+class TestValidateAndSetConfig:
+    """Tests for validate_and_set_config function."""
+
+    def test_reward_model_not_supported(self):
+        """Test that reward models are not supported."""
+        from nemo_rl.models.megatron.setup import validate_and_set_config
+
+        config = {
+            "reward_model_cfg": {"enabled": True},
+            "precision": "bfloat16",
+            "megatron_cfg": {
+                "optimizer": {
+                    "optimizer_cpu_offload": False,
+                },
+            },
+            "offload_optimizer_for_logprob": False,
+        }
+
+        with pytest.raises(NotImplementedError) as exc_info:
+            validate_and_set_config(
+                config=config,
+                rank=0,
+                hf_model_name="test-model",
+                pretrained_path="/path/to/model",
+                weights_path=None,
+                tokenizer=MagicMock(),
+            )
+
+        assert "Reward models are not yet supported" in str(exc_info.value)
+
+    def test_generation_colocation_detection(self):
+        """Test that generation colocation is properly detected."""
+        # This test would require more mocking to fully test
+        # For now, we just verify the config parsing works
+        from nemo_rl.models.megatron.setup import validate_and_set_config
+
+        config = {
+            "generation": {
+                "colocated": {"enabled": True},
+            },
+            "precision": "bfloat16",
+            "megatron_cfg": {
+                "optimizer": {
+                    "optimizer_cpu_offload": False,
+                },
+                "tensor_model_parallel_size": 2,
+            },
+            "offload_optimizer_for_logprob": False,
+        }
+
+        # The function would fail on setup_model_config, but we test the initial parsing
+        with patch(
+            "nemo_rl.models.megatron.setup.setup_model_config"
+        ) as mock_setup_model_config:
+            mock_megatron_cfg = MagicMock()
+            mock_megatron_cfg.model.vocab_size = 32000
+            mock_setup_model_config.return_value = (mock_megatron_cfg, MagicMock())
+
+            with patch(
+                "nemo_rl.models.megatron.setup.calculate_padded_vocab_size",
+                return_value=32000,
+            ):
+                runtime_config = validate_and_set_config(
+                    config=config,
+                    rank=0,
+                    hf_model_name="test-model",
+                    pretrained_path="/path/to/model",
+                    weights_path=None,
+                    tokenizer=MagicMock(),
+                )
+
+                assert runtime_config.is_generation_colocated is True
+
+
+@pytest.mark.mcore
+class TestRuntimeConfigNamedTuple:
+    """Tests for RuntimeConfig named tuple."""
+
+    def test_runtime_config_fields(self):
+        """Test that RuntimeConfig has all expected fields."""
+        from nemo_rl.models.megatron.config import RuntimeConfig
+
+        runtime_config = RuntimeConfig(
+            megatron_cfg=MagicMock(),
+            model_cfg=MagicMock(),
+            dtype=torch.bfloat16,
+            optimizer_cpu_offload=False,
+            offload_optimizer_for_logprob=True,
+            is_generation_colocated=True,
+            final_padded_vocab_size=32000,
+        )
+
+        assert runtime_config.dtype == torch.bfloat16
+        assert runtime_config.optimizer_cpu_offload is False
+        assert runtime_config.offload_optimizer_for_logprob is True
+        assert runtime_config.is_generation_colocated is True
+        assert runtime_config.final_padded_vocab_size == 32000
+
+
+@pytest.mark.mcore
+class TestModelAndOptimizerStateNamedTuple:
+    """Tests for ModelAndOptimizerState named tuple."""
+
+    def test_model_and_optimizer_state_fields(self):
+        """Test that ModelAndOptimizerState has all expected fields."""
+        from nemo_rl.models.megatron.config import ModelAndOptimizerState
+
+        state = ModelAndOptimizerState(
+            state=MagicMock(),
+            model=MagicMock(),
+            optimizer=MagicMock(),
+            scheduler=MagicMock(),
+            checkpointing_context={"test": "context"},
+            param_sync_func=lambda: None,
+        )
+
+        assert state.checkpointing_context == {"test": "context"}
+        assert callable(state.param_sync_func)
+
+
+@pytest.mark.mcore
+class TestHandleModelImport:
+    """Tests for handle_model_import function."""
+
+    def test_skip_import_when_checkpoint_exists(self, tmp_path, capsys):
+        """Test that import is skipped when checkpoint exists."""
+        from nemo_rl.models.megatron.setup import handle_model_import
+
+        pretrained_path = str(tmp_path / "model")
+        config = {"model_name": "test-model", "megatron_cfg": {}}
+
+        handle_model_import(
+            config, "test-model", pretrained_path, pt_checkpoint_exists=True
+        )
+
+        captured = capsys.readouterr()
+        assert "Checkpoint already exists" in captured.out
+
+    @patch("nemo_rl.models.megatron.setup.import_model_from_hf_name")
+    @patch("nemo_rl.models.megatron.setup.parallel_state")
+    def test_import_when_checkpoint_missing(self, mock_ps, mock_import, tmp_path):
+        """Test that model is imported when checkpoint doesn't exist."""
+        from nemo_rl.models.megatron.setup import handle_model_import
+
+        mock_ps.model_parallel_is_initialized.return_value = False
+
+        pretrained_path = str(tmp_path / "model")
+        config = {
+            "model_name": "test-model",
+            "megatron_cfg": {"some_config": "value"},
+            "hf_config_overrides": None,
+        }
+
+        handle_model_import(
+            config, "test-model", pretrained_path, pt_checkpoint_exists=False
+        )
+
+        mock_import.assert_called_once_with(
+            "test-model",
+            pretrained_path,
+            {"some_config": "value"},
+        )
+
+    @patch("nemo_rl.models.megatron.setup.import_model_from_hf_name")
+    @patch("nemo_rl.models.megatron.setup.parallel_state")
+    def test_reinitialize_parallel_state_after_import(
+        self, mock_ps, mock_import, tmp_path, capsys
+    ):
+        """Test that parallel state is destroyed after model import."""
+        from nemo_rl.models.megatron.setup import handle_model_import
+
+        mock_ps.model_parallel_is_initialized.return_value = True
+
+        pretrained_path = str(tmp_path / "model")
+        config = {
+            "model_name": "test-model",
+            "megatron_cfg": {},
+            "hf_config_overrides": {},
+        }
+
+        handle_model_import(
+            config, "test-model", pretrained_path, pt_checkpoint_exists=False
+        )
+
+        mock_ps.destroy_model_parallel.assert_called_once()
+
+        captured = capsys.readouterr()
+        assert "Reinitializing model parallel" in captured.out
+
+
+@pytest.mark.mcore
+class TestSetupModelAndOptimizer:
+    """Tests for setup_model_and_optimizer function."""
+
+    @patch("nemo_rl.models.megatron.setup.GlobalState")
+    @patch("nemo_rl.models.megatron.setup.initialize_megatron")
+    @patch("nemo_rl.models.megatron.setup.set_jit_fusion_options")
+    @patch("nemo_rl.models.megatron.setup.init_checkpointing_context")
+    @patch("nemo_rl.models.megatron.setup.build_tokenizer")
+    @patch("nemo_rl.models.megatron.setup.get_model")
+    @patch("nemo_rl.models.megatron.setup.setup_optimizer")
+    @patch("nemo_rl.models.megatron.setup.checkpoint_exists")
+    @patch("nemo_rl.models.megatron.setup.MoEFloat16Module")
+    @patch("torch.distributed.all_reduce")
+    @patch("torch.distributed.barrier")
+    @patch("torch.tensor")
+    def test_setup_with_param_sync_and_frozen_moe_router(
+        self,
+        mock_tensor,
+        mock_barrier,
+        mock_all_reduce,
+        mock_custom_float16,
+        mock_checkpoint_exists,
+        mock_setup_optimizer,
+        mock_get_model,
+        mock_build_tokenizer,
+        mock_init_ckpt_context,
+        mock_set_jit,
+        mock_init_megatron,
+        mock_global_state,
+    ):
+        """Test setup_model_and_optimizer with MoE router freezing."""
+        from nemo_rl.models.megatron.setup import setup_model_and_optimizer
+
+        # Setup mocks
+        mock_state = MagicMock()
+        mock_state.start_time = 0.0
+        mock_global_state.return_value = mock_state
+
+        mock_megatron_cfg = MagicMock()
+        mock_megatron_cfg.ft = None
+        mock_megatron_cfg.model.vocab_size = 32000
+        mock_megatron_cfg.model.make_vocab_size_divisible_by = 128
+        mock_megatron_cfg.model.tensor_model_parallel_size = 1
+        # Enable param gather overlap
+        mock_megatron_cfg.ddp.overlap_param_gather = True
+        mock_megatron_cfg.ddp.align_param_gather = True
+        mock_megatron_cfg.checkpoint.load = None
+        mock_megatron_cfg.checkpoint.pretrained_checkpoint = None
+
+        mock_model_chunk = MagicMock()
+        mock_model_chunk.start_param_sync = MagicMock()
+        mock_model = [mock_model_chunk]
+        mock_get_model.return_value = mock_model
+
+        mock_optimizer = MagicMock()
+        mock_scheduler = MagicMock()
+        mock_setup_optimizer.return_value = (mock_optimizer, mock_scheduler)
+
+        mock_tensor_instance = MagicMock()
+        mock_tensor_instance.item.return_value = 0.0
+        mock_tensor.return_value = mock_tensor_instance
+
+        mock_checkpoint_exists.return_value = False
+
+        policy_cfg = {
+            "megatron_cfg": {
+                "freeze_moe_router": True,  # Enable MoE router freezing
+            }
+        }
+
+        result = setup_model_and_optimizer(
+            policy_cfg=policy_cfg,
+            megatron_cfg=mock_megatron_cfg,
+            load_optimizer=True,
+        )
+
+        # Verify get_model was called (the mixed_precision_wrapper should be CustomFloat16Module)
+        mock_get_model.assert_called_once()
+        call_kwargs = mock_get_model.call_args[1]
+        # Check that pre_wrap_hook is not empty when freeze_moe_router is True
+        assert len(call_kwargs.get("pre_wrap_hook", [])) > 0
+
+        assert result.param_sync_func == mock_model_chunk.start_param_sync
+
+
+@pytest.mark.mcore
+class TestSetupReferenceModelState:
+    """Tests for setup_reference_model_state function."""
+
+    @patch("nemo_rl.models.megatron.setup.init_checkpointing_context")
+    @patch("nemo_rl.models.megatron.setup.GlobalState")
+    @patch("nemo_rl.models.megatron.setup.get_model")
+    @patch("nemo_rl.models.megatron.setup.checkpoint_exists")
+    @patch("nemo_rl.models.megatron.setup.load_checkpoint")
+    @patch("nemo_rl.models.megatron.setup.HAVE_FSDP2", False)
+    def test_setup_reference_model(
+        self,
+        mock_load_checkpoint,
+        mock_checkpoint_exists,
+        mock_get_model,
+        mock_global_state,
+        mock_init_ckpt_context,
+        capsys,
+    ):
+        """Test setup_reference_model_state when checkpoint exists."""
+        from nemo_rl.models.megatron.setup import setup_reference_model_state
+
+        # Setup mocks
+        mock_state = MagicMock()
+        mock_global_state.return_value = mock_state
+
+        mock_megatron_cfg = MagicMock()
+        mock_megatron_cfg.dist.use_torch_fsdp2 = False
+
+        # Create mock model with state dict
+        mock_model = MagicMock()
+        mock_model.state_dict.return_value = {
+            "layer1.weight": torch.tensor([1.0, 2.0]),
+            "layer1.bias": torch.tensor([0.1]),
+        }
+        mock_get_model.return_value = [mock_model]
+
+        mock_checkpoint_exists.return_value = True
+
+        config = {
+            "megatron_cfg": {
+                "freeze_moe_router": False,
+            }
+        }
+
+        result = setup_reference_model_state(
+            config=config,
+            megatron_cfg=mock_megatron_cfg,
+            pretrained_path="/path/to/pretrained",
+        )
+
+        # Verify checkpoint was loaded
+        mock_load_checkpoint.assert_called_once()
+
+        # Verify model was set to eval mode
+        mock_model.eval.assert_called_once()
+
+        # Verify state dict is returned
+        assert isinstance(result, dict)
+        assert "layer1.weight" in result
+        assert "layer1.bias" in result
+
+        # Verify tensors are on CPU
+        assert result["layer1.weight"].device.type == "cpu"
+
+        captured = capsys.readouterr()
+        assert "Reference model loaded" in captured.out
+
+
+@pytest.mark.mcore
+class TestFinalizeMegatronSetup:
+    """Tests for finalize_megatron_setup function."""
+
+    @patch("nemo_rl.models.megatron.setup._update_model_config_funcs")
+    @patch("nemo_rl.models.megatron.setup.build_tokenizer")
+    @patch("nemo_rl.models.megatron.setup.AutoBridge")
+    def test_basic_finalize_setup(
+        self,
+        mock_auto_bridge,
+        mock_build_tokenizer,
+        mock_update_model_config,
+    ):
+        """Test basic finalize_megatron_setup."""
+        from nemo_rl.models.megatron.setup import finalize_megatron_setup
+
+        # Setup mocks
+        mock_megatron_cfg = MagicMock()
+        mock_megatron_cfg.model.make_vocab_size_divisible_by = 128
+
+        mock_model = MagicMock()
+        mock_optimizer = MagicMock()
+
+        mock_worker_sharding = MagicMock()
+        mock_worker_sharding.get_axis_size.return_value = 4  # dp_size = 4
+
+        mock_tokenizer = MagicMock()
+        mock_build_tokenizer.return_value = mock_tokenizer
+
+        mock_bridge = MagicMock()
+        mock_auto_bridge.from_hf_pretrained.return_value = mock_bridge
+
+        config = {
+            "megatron_cfg": {
+                "tensor_model_parallel_size": 2,
+                "optimizer": {
+                    "use_distributed_optimizer": False,
+                },
+                "distributed_data_parallel_config": {
+                    "overlap_param_gather": False,
+                },
+            }
+        }
+
+        result = finalize_megatron_setup(
+            config=config,
+            megatron_cfg=mock_megatron_cfg,
+            hf_model_name="test-model",
+            worker_sharding_annotations=mock_worker_sharding,
+            model=mock_model,
+            optimizer=mock_optimizer,
+        )
+
+        # Verify return values
+        megatron_tokenizer, megatron_bridge, should_disable_hook, dp_size = result
+        assert megatron_tokenizer == mock_tokenizer
+        assert megatron_bridge == mock_bridge
+        assert should_disable_hook is False
+        assert dp_size == 4
+
+        # Verify function calls
+        mock_update_model_config.assert_called_once()
+        mock_build_tokenizer.assert_called_once()
+        mock_auto_bridge.from_hf_pretrained.assert_called_once_with(
+            "test-model", trust_remote_code=True
+        )
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index 5f09460cfb..426c64a0d1 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -135,6 +135,9 @@ def create_megatron_test_config(
             "apply_rope_fusion": True,
             "bias_activation_fusion": True,
             "moe_per_layer_logging": False,
+            "moe_enable_deepep": False,
+            "moe_token_dispatcher_type": "allgather",
+            "moe_shared_expert_overlap": False,
             "defer_fp32_logits": defer_fp32_logits,
             "train_iters": 100,  # Required for Megatron training
             "optimizer": {
diff --git a/tests/unit/models/policy/test_policy_utils.py b/tests/unit/models/policy/test_policy_utils.py
new file mode 100644
index 0000000000..5fbcf8e86e
--- /dev/null
+++ b/tests/unit/models/policy/test_policy_utils.py
@@ -0,0 +1,224 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import types
+from unittest.mock import MagicMock
+
+import pytest
+import requests
+import torch
+
+from nemo_rl.models.policy import utils as policy_utils
+
+pytestmark = pytest.mark.sglang
+
+
+def test_setup_ipc_gather_group_returns_none_when_dist_uninit(monkeypatch):
+    monkeypatch.setattr(policy_utils.dist, "is_initialized", lambda: False)
+
+    group, src, ranks = policy_utils._setup_ipc_gather_group(
+        rank=0,
+        current_device_uuid="uuid0",
+        sglang_gpu_uuids=["uuid0"],
+        sglang_url_to_gpu_uuids={"http://sglang": ["uuid0"]},
+    )
+
+    assert group is None
+    assert src is None
+    assert ranks is None
+
+
+def test_setup_ipc_gather_group_selects_matching_ranks(monkeypatch):
+    all_ranks = ["uuid0", "uuid1", "uuid2", "uuid3"]
+
+    monkeypatch.setattr(policy_utils.dist, "is_initialized", lambda: True)
+    monkeypatch.setattr(policy_utils.dist, "get_world_size", lambda: 4)
+    monkeypatch.setattr(policy_utils.dist, "get_rank", lambda: 1)
+
+    def fake_all_gather_object(output_list, _value):
+        for idx, item in enumerate(all_ranks):
+            output_list[idx] = item
+
+    monkeypatch.setattr(policy_utils.dist, "all_gather_object", fake_all_gather_object)
+
+    group, src, ranks = policy_utils._setup_ipc_gather_group(
+        rank=1,
+        current_device_uuid="uuid1",
+        sglang_gpu_uuids=["uuid1", "uuid3"],
+        sglang_url_to_gpu_uuids={"http://sglang": ["uuid1", "uuid3"]},
+    )
+
+    assert group is None
+    assert src == 1
+    assert ranks == [1, 3]
+
+
+def test_gather_ipc_handlers_returns_filtered_on_src(monkeypatch):
+    handlers = ["h0", "h1", "h2", "h3"]
+    monkeypatch.setattr(policy_utils.dist, "is_initialized", lambda: True)
+    monkeypatch.setattr(policy_utils.dist, "get_world_size", lambda: 4)
+
+    def fake_all_gather_object(output_list, _value):
+        for idx, item in enumerate(handlers):
+            output_list[idx] = item
+
+    monkeypatch.setattr(policy_utils.dist, "all_gather_object", fake_all_gather_object)
+
+    gathered = policy_utils._gather_ipc_handlers(
+        serialized_handler="h1",
+        gather_group=None,
+        gather_src=0,
+        rank=0,
+        matching_ranks=[0, 2],
+    )
+
+    assert gathered == ["h0", "h2"]
+
+
+def test_gather_ipc_handlers_non_src_returns_none(monkeypatch):
+    monkeypatch.setattr(policy_utils.dist, "is_initialized", lambda: True)
+    monkeypatch.setattr(policy_utils.dist, "get_world_size", lambda: 2)
+    monkeypatch.setattr(policy_utils.dist, "all_gather_object", lambda *_args: None)
+
+    gathered = policy_utils._gather_ipc_handlers(
+        serialized_handler="h1",
+        gather_group=None,
+        gather_src=0,
+        rank=1,
+        matching_ranks=[0, 1],
+    )
+
+    assert gathered is None
+
+
+def test_send_tensor_to_sglang_http_error(monkeypatch):
+    response = MagicMock()
+    response.raise_for_status.side_effect = requests.exceptions.HTTPError("boom")
+    response.status_code = 500
+    response.text = "error"
+    monkeypatch.setattr(
+        policy_utils.requests, "post", lambda *_args, **_kwargs: response
+    )
+
+    with pytest.raises(RuntimeError, match="Failed to send tensor 'w'"):
+        policy_utils._send_tensor_to_sglang(
+            url="http://sglang/update",
+            tensor_name="w",
+            gathered_handlers=["h0"],
+            shape=torch.Size([1]),
+            dtype="torch.float32",
+        )
+
+
+def test_send_tensor_to_sglang_generic_error(monkeypatch):
+    def raise_error(*_args, **_kwargs):
+        raise RuntimeError("network down")
+
+    monkeypatch.setattr(policy_utils.requests, "post", raise_error)
+
+    with pytest.raises(RuntimeError, match="Failed to send tensor 'w'"):
+        policy_utils._send_tensor_to_sglang(
+            url="http://sglang/update",
+            tensor_name="w",
+            gathered_handlers=["h0"],
+            shape=torch.Size([1]),
+            dtype="torch.float32",
+        )
+
+
+def test_stream_weights_via_http_impl_no_matching_url(monkeypatch):
+    monkeypatch.setattr(policy_utils.torch.cuda, "empty_cache", lambda: None)
+
+    with pytest.raises(RuntimeError, match="No matching SGLang server"):
+        policy_utils.stream_weights_via_http_impl(
+            params_generator=iter([]),
+            sglang_url_to_gpu_uuids={"http://sglang": ["uuid0"]},
+            rank=0,
+            worker_name="worker",
+            current_device_uuid="uuid1",
+        )
+
+
+def test_stream_weights_via_http_impl_sends_tensors(monkeypatch):
+    def params_generator():
+        yield "w1", torch.tensor([1.0])
+        yield "w2", torch.tensor([2.0])
+
+    dummy_module = types.ModuleType(
+        "nemo_rl.models.generation.sglang.sglang_copied_utils"
+    )
+
+    class DummySerializer:
+        @staticmethod
+        def serialize(*_args, **_kwargs):
+            return "handler"
+
+    dummy_module.MultiprocessingSerializer = DummySerializer
+    monkeypatch.setitem(
+        sys.modules,
+        "nemo_rl.models.generation.sglang.sglang_copied_utils",
+        dummy_module,
+    )
+    monkeypatch.setattr(policy_utils.torch.cuda, "empty_cache", lambda: None)
+    monkeypatch.setattr(
+        policy_utils.torch.cuda,
+        "current_stream",
+        lambda: types.SimpleNamespace(synchronize=lambda: None),
+    )
+    monkeypatch.setattr(
+        policy_utils.torch.Tensor, "cuda", lambda self: self, raising=False
+    )
+
+    send_calls = []
+
+    def fake_send_tensor_to_sglang(
+        url, name, gathered_handlers, shape, dtype, flush_cache=False
+    ):
+        send_calls.append(
+            {
+                "url": url,
+                "name": name,
+                "handlers": gathered_handlers,
+                "shape": shape,
+                "dtype": dtype,
+                "flush_cache": flush_cache,
+            }
+        )
+
+    monkeypatch.setattr(
+        policy_utils,
+        "_setup_ipc_gather_group",
+        lambda *_args, **_kwargs: (None, 0, [0]),
+    )
+    monkeypatch.setattr(
+        policy_utils, "_gather_ipc_handlers", lambda *_args, **_kwargs: ["handler"]
+    )
+    monkeypatch.setattr(
+        policy_utils, "_send_tensor_to_sglang", fake_send_tensor_to_sglang
+    )
+
+    policy_utils.stream_weights_via_http_impl(
+        params_generator=params_generator(),
+        sglang_url_to_gpu_uuids={
+            "http://sglang-a": ["uuid0"],
+            "http://sglang-b": ["uuid0"],
+        },
+        rank=0,
+        worker_name="worker",
+        current_device_uuid="uuid0",
+    )
+
+    assert [call["name"] for call in send_calls] == ["w1", "w2"]
+    assert all(call["handlers"] == ["handler"] for call in send_calls)
diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py
index 88553567c7..7762e18988 100644
--- a/tests/unit/test_recipes_and_test_suites.py
+++ b/tests/unit/test_recipes_and_test_suites.py
@@ -28,6 +28,8 @@
 
 nightly_test_suite_path = os.path.join(test_suites_dir, "nightly.txt")
 release_test_suite_path = os.path.join(test_suites_dir, "release.txt")
+nightly_gb200_test_suite_path = os.path.join(test_suites_dir, "nightly_gb200.txt")
+release_gb200_test_suite_path = os.path.join(test_suites_dir, "release_gb200.txt")
 h100_performance_test_suite_path = os.path.join(test_suites_dir, "performance_h100.txt")
 gb200_performance_test_suite_path = os.path.join(
     test_suites_dir, "performance_gb200.txt"
@@ -72,6 +74,28 @@ def release_test_suite():
     return release_suite
 
 
+@pytest.fixture
+def nightly_gb200_test_suite():
+    nightly_gb200_suite = []
+    with open(nightly_gb200_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                nightly_gb200_suite.append(line)
+    return nightly_gb200_suite
+
+
+@pytest.fixture
+def release_gb200_test_suite():
+    release_gb200_suite = []
+    with open(release_gb200_test_suite_path, "r") as f:
+        for line in f:
+            line = line.strip()
+            if line and not line.startswith("#"):
+                release_gb200_suite.append(line)
+    return release_gb200_suite
+
+
 @pytest.fixture
 def performance_test_suite():
     performance_suite = []
@@ -92,9 +116,17 @@ def performance_test_suite():
 def all_test_suites(
     nightly_test_suite,
     release_test_suite,
+    nightly_gb200_test_suite,
+    release_gb200_test_suite,
     performance_test_suite,
 ):
-    return nightly_test_suite + release_test_suite + performance_test_suite
+    return (
+        nightly_test_suite
+        + release_test_suite
+        + nightly_gb200_test_suite
+        + release_gb200_test_suite
+        + performance_test_suite
+    )
 
 
 @pytest.fixture
@@ -112,12 +144,16 @@ def all_recipe_yaml_rel_paths():
     [
         nightly_test_suite_path,
         release_test_suite_path,
+        nightly_gb200_test_suite_path,
+        release_gb200_test_suite_path,
         h100_performance_test_suite_path,
         gb200_performance_test_suite_path,
     ],
     ids=[
         "nightly_test_suite",
         "release_test_suite",
+        "nightly_gb200_test_suite",
+        "release_gb200_test_suite",
         "h100_performance_test_suite",
         "gb200_performance_test_suite",
     ],
diff --git a/tests/unit/test_version_check.py b/tests/unit/test_version_check.py
index cdd8b7752f..26fb1cd496 100644
--- a/tests/unit/test_version_check.py
+++ b/tests/unit/test_version_check.py
@@ -355,7 +355,7 @@ def test_build_isolation_detected_during_uv_sync(self, dummy_project):
             cwd=dummy_project,
             capture_output=True,
             text=True,
-            timeout=60,
+            timeout=180,
             env=test_env,
         )
 
diff --git a/tests/unit/tools/test_config_cli.py b/tests/unit/tools/test_config_cli.py
index 63af6c8294..dcd94ccd64 100644
--- a/tests/unit/tools/test_config_cli.py
+++ b/tests/unit/tools/test_config_cli.py
@@ -21,6 +21,9 @@
 import pytest
 from omegaconf import OmegaConf
 
+# All tests in this module should run first
+pytestmark = pytest.mark.run_first
+
 
 def _load_cli_module() -> Any:
     # Use a path relative to this test file to import tools/config_cli.py
@@ -101,9 +104,10 @@ def test__ensure_defaults_relative_variants(cli: Any, tmp_path: Path) -> None:
     assert cfg3["defaults"][0] == rel
 
 
-def test_minimize_in_place_and_check(
+def test_minimize_in_place_and_check_with_explicit_base(
     cli: Any, tmp_path: Path, capsys: pytest.CaptureFixture[str]
 ) -> None:
+    """Test minimize with explicit --base option (rebase mode)."""
     base = tmp_path / "base.yaml"
     child = tmp_path / "child.yaml"
     base.write_text(
@@ -132,14 +136,14 @@ def test_minimize_in_place_and_check(
         ).strip()
     )
 
-    # Before minimizing, check should fail
+    # Before minimizing with explicit base, check should fail
     ns = type("NS", (), {"base": str(base), "config": str(child)})
     ret = cli.minimize_check(ns)
     assert ret == 1
     err = capsys.readouterr().err
     assert "Suggested fix" in err
 
-    # Minimize in place
+    # Minimize in place with explicit base
     ns2 = type("NS", (), {"base": str(base), "config": str(child), "in_place": True})
     ret2 = cli.minimize(ns2)
     assert ret2 == 0
@@ -278,3 +282,271 @@ def test_vendored_loader_drift_against_upstream_source() -> None:
     up_src = inspect.getsource(upstream_fn).strip()
     ven_src = inspect.getsource(vendored_fn).strip()
     assert up_src == ven_src
+
+
+def test_infer_base_from_defaults(cli: Any, tmp_path: Path) -> None:
+    """Test that _infer_base_from_defaults correctly resolves the base path."""
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "recipes" / "child.yaml"
+    child.parent.mkdir(parents=True, exist_ok=True)
+    parent.write_text("key: value\n")
+    child.write_text("defaults: ../parent.yaml\noverride: 1\n")
+
+    child_cfg = OmegaConf.load(child)
+    base_path = cli._infer_base_from_defaults(child.resolve(), child_cfg)
+    assert base_path == parent.resolve()
+
+
+def test_infer_base_from_defaults_missing_defaults(cli: Any, tmp_path: Path) -> None:
+    """Test that missing defaults raises an error."""
+    child = tmp_path / "child.yaml"
+    child.write_text("key: value\n")
+
+    child_cfg = OmegaConf.load(child)
+    with pytest.raises(ValueError, match="no 'defaults' key"):
+        cli._infer_base_from_defaults(child.resolve(), child_cfg)
+
+
+def test_infer_base_from_defaults_list_defaults(cli: Any, tmp_path: Path) -> None:
+    """Test that list defaults raises an error (we enforce single inheritance)."""
+    child = tmp_path / "child.yaml"
+    child.write_text("defaults:\n  - parent1.yaml\n  - parent2.yaml\nkey: value\n")
+
+    child_cfg = OmegaConf.load(child)
+    with pytest.raises(ValueError, match="list"):
+        cli._infer_base_from_defaults(child.resolve(), child_cfg)
+
+
+def test_minimize_inferred_base_preserves_chain_overrides(
+    cli: Any, tmp_path: Path
+) -> None:
+    """Test minimize with inferred base correctly handles grandchild → parent → grandparent.
+
+    Scenario:
+      - grandparent.yaml: sets teacher.tp = 1
+      - parent.yaml (defaults: grandparent): sets teacher.tp = 4
+      - child.yaml (defaults: parent): sets teacher.tp = 2 (override back)
+
+    When minimizing child.yaml (with base inferred from defaults=parent.yaml),
+    the teacher.tp = 2 must be kept because it differs from the expanded parent (which has 4).
+    """
+    grandparent = tmp_path / "grandparent.yaml"
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+
+    grandparent.write_text(
+        dedent(
+            """
+            teacher:
+              tp: 1
+              other: base_value
+            policy:
+              lr: 0.001
+            """
+        ).strip()
+    )
+    parent.write_text(
+        dedent(
+            """
+            defaults: grandparent.yaml
+            teacher:
+              tp: 4
+            """
+        ).strip()
+    )
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            teacher:
+              tp: 2
+            custom: child_only
+            """
+        ).strip()
+    )
+
+    # Minimize child with inferred base (should use parent.yaml)
+    ns = type("NS", (), {"config": str(child), "base": None, "in_place": False})
+    ret = cli.minimize(ns)
+    assert ret == 0
+
+    # Re-read child to check what minimize would output
+    # (since in_place=False, we need to capture stdout)
+    import io
+    import sys
+
+    old_stdout = sys.stdout
+    sys.stdout = captured = io.StringIO()
+    cli.minimize(ns)
+    sys.stdout = old_stdout
+    minimized = captured.getvalue()
+
+    # teacher.tp = 2 must be kept (differs from parent's expanded value of 4)
+    assert "tp: 2" in minimized
+    # custom key must be kept
+    assert "custom: child_only" in minimized
+    # defaults must be preserved as-is
+    assert "defaults: parent.yaml" in minimized
+
+
+def test_minimize_inferred_base_removes_redundant_keys(
+    cli: Any, tmp_path: Path
+) -> None:
+    """Test that keys matching the expanded parent are correctly removed."""
+    grandparent = tmp_path / "grandparent.yaml"
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+
+    grandparent.write_text(
+        dedent(
+            """
+            common:
+              a: 1
+              b: 2
+            """
+        ).strip()
+    )
+    parent.write_text(
+        dedent(
+            """
+            defaults: grandparent.yaml
+            common:
+              b: 3
+            extra: from_parent
+            """
+        ).strip()
+    )
+    # Child redundantly sets common.b = 3 (same as parent) - should be removed
+    # Child sets common.a = 1 (same as grandparent, but parent doesn't override) - should be removed
+    # Child sets extra = from_parent (same as parent) - should be removed
+    # Child sets unique = child_only - should be kept
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            common:
+              a: 1
+              b: 3
+            extra: from_parent
+            unique: child_only
+            """
+        ).strip()
+    )
+
+    import io
+    import sys
+
+    ns = type("NS", (), {"config": str(child), "base": None, "in_place": False})
+    old_stdout = sys.stdout
+    sys.stdout = captured = io.StringIO()
+    cli.minimize(ns)
+    sys.stdout = old_stdout
+    minimized = captured.getvalue()
+
+    # Redundant keys should be removed
+    assert "a: 1" not in minimized
+    assert "b: 3" not in minimized
+    assert "extra: from_parent" not in minimized
+    # Unique key should be kept
+    assert "unique: child_only" in minimized
+    # defaults preserved
+    assert "defaults: parent.yaml" in minimized
+
+
+def test_minimize_check_inferred_base(cli: Any, tmp_path: Path) -> None:
+    """Test minimize-check with inferred base."""
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+
+    parent.write_text(
+        dedent(
+            """
+            common:
+              a: 1
+              b: 2
+            """
+        ).strip()
+    )
+    # Child is already minimal
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            common:
+              b: 3
+            """
+        ).strip()
+    )
+
+    ns = type("NS", (), {"config": str(child), "base": None})
+    ret = cli.minimize_check(ns)
+    assert ret == 0  # Already minimized
+
+    # Now add a redundant key
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            common:
+              a: 1
+              b: 3
+            """
+        ).strip()
+    )
+
+    ret2 = cli.minimize_check(ns)
+    assert ret2 == 1  # Needs minimizing
+
+
+def test_minimize_with_explicit_base_rebases(cli: Any, tmp_path: Path) -> None:
+    """Test that --base option rebases the config to a different parent."""
+    grandparent = tmp_path / "grandparent.yaml"
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+
+    grandparent.write_text(
+        dedent(
+            """
+            teacher:
+              tp: 1
+            policy:
+              lr: 0.001
+            """
+        ).strip()
+    )
+    parent.write_text(
+        dedent(
+            """
+            defaults: grandparent.yaml
+            teacher:
+              tp: 4
+            """
+        ).strip()
+    )
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            teacher:
+              tp: 2
+            """
+        ).strip()
+    )
+
+    import io
+    import sys
+
+    # Minimize with explicit base=grandparent (rebase mode)
+    ns = type(
+        "NS", (), {"config": str(child), "base": str(grandparent), "in_place": False}
+    )
+    old_stdout = sys.stdout
+    sys.stdout = captured = io.StringIO()
+    cli.minimize(ns)
+    sys.stdout = old_stdout
+    minimized = captured.getvalue()
+
+    # defaults should now point to grandparent
+    assert "grandparent.yaml" in minimized
+    # teacher.tp = 2 differs from grandparent's 1, so kept
+    assert "tp: 2" in minimized
diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py
index d88137746a..52b380a213 100644
--- a/tests/unit/utils/test_logger.py
+++ b/tests/unit/utils/test_logger.py
@@ -1493,8 +1493,12 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         logger.log_metrics(metrics, step)
 
         # Check that log_metrics was called on both loggers
-        mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
-        mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
+        mock_wandb_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None, False
+        )
+        mock_tb_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None, False
+        )
 
     @patch("nemo_rl.utils.logger.WandbLogger")
     @patch("nemo_rl.utils.logger.TensorboardLogger")
@@ -1603,10 +1607,10 @@ def test_log_metrics_with_prefix_and_step_metric(
 
         # Check that log_metrics was called on both loggers with correct parameters
         mock_wandb_instance.log_metrics.assert_called_once_with(
-            metrics, step, prefix, step_metric
+            metrics, step, prefix, step_metric, False
         )
         mock_tb_instance.log_metrics.assert_called_once_with(
-            metrics, step, prefix, step_metric
+            metrics, step, prefix, step_metric, False
         )
 
     @patch("nemo_rl.utils.logger.WandbLogger")
@@ -1768,13 +1772,17 @@ def test_log_metrics_with_mlflow(
         logger.log_metrics(metrics, step)
 
         # Check that log_metrics was called on all loggers
-        mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
+        mock_wandb_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None, False
+        )
         mock_swanlab_instance.log_metrics.assert_called_once_with(
-            metrics, step, "", None
+            metrics, step, "", None, False
+        )
+        mock_tb_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None, False
         )
-        mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
         mock_mlflow_instance.log_metrics.assert_called_once_with(
-            metrics, step, "", None
+            metrics, step, "", None, False
         )
 
     @patch("nemo_rl.utils.logger.WandbLogger")
diff --git a/tools/config_cli.py b/tools/config_cli.py
index 04780e7747..a0dbc9917a 100755
--- a/tools/config_cli.py
+++ b/tools/config_cli.py
@@ -21,10 +21,10 @@
 
 Subcommands:
   - expand: Resolve a config with OmegaConf interpolation and inheritance.
-  - minimize: Given a base config and a config, remove keys in the config that
-    are equal to the base, and ensure a defaults entry pointing to the base
-    exists. The defaults path in the resulting config is written relative to
-    the base config file.
+  - minimize: Remove keys in the config that are equal to what it inherits from
+    its `defaults` chain. By default, the base is inferred from the config's
+    `defaults` key (which must be a string, not a list). Optionally, pass
+    --base to override and rebase the config to a different parent.
   - minimize-check: Same args as `minimize` but only checks if minimization
     would change the file; exits non-zero if changes are needed.
 
@@ -37,40 +37,24 @@
   # Expand a config with a root level "defaults" key to see the full config; edit the config in place
   tools/config_cli.py expand examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --in-place
 
-  # Minimize a config and remove all keys that are present in the base config; print to stdout
-  # tools/config_cli.py minimize <base_config> <config>
-  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
-
-  # Minimize a config and remove all keys that are present in the base config; edit the config in place
-  # tools/config_cli.py minimize <base_config> <config>
-  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --in-place
-
-  # Minimize all llm the configs:
-  for algo in grpo dpo sft distillation; do
-    base_config=examples/configs/${algo}.yaml
-    if [[ ${algo} == grpo ]]; then
-      base_config=examples/configs/grpo_math_1B.yaml
-    elif [[ ${algo} == distillation ]]; then
-      base_config=examples/configs/distillation_math.yaml
-    fi
-    for recipe in examples/configs/recipes/llm/${algo}-*.yaml; do
-      tools/config_cli.py minimize $base_config $recipe --in-place
-    done
-  done
+  # Minimize a config (base inferred from its defaults key); print to stdout
+  tools/config_cli.py minimize examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+
+  # Minimize a config in place
+  tools/config_cli.py minimize examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --in-place
 
-  # Minimize vlm configs:
-  for recipe in examples/configs/recipes/vlm/vlm_grpo-*.yaml; do
-    tools/config_cli.py minimize examples/configs/vlm_grpo_3B.yaml $recipe --in-place
+  # Minimize with explicit base (rebase to a different parent)
+  tools/config_cli.py minimize examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --base examples/configs/dpo.yaml --in-place
+
+  # Minimize all configs:
+  for recipe in examples/configs/recipes/{llm,vlm}/*.yaml; do
+    if ! tools/config_cli.py minimize-check $recipe 2>/dev/null; then
+      tools/config_cli.py minimize $recipe --in-place
+    fi
   done
 
   # Compare two configs
   tools/config_cli.py compare examples/configs/grpo_math_1B.yaml examples/configs/grpo_math_8B.yaml
-
-  # Minimize a config and compare it to not minimzing (should be the same)
-  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml >examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml.minimized
-  tools/config_cli.py compare \
-    examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml \
-    examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml.minimized
 """
 
 import argparse
@@ -312,40 +296,83 @@ def expand(args: argparse.Namespace) -> int:
     return 0
 
 
+def _infer_base_from_defaults(child_path: Path, child_cfg_raw: DictConfig) -> Path:
+    """Infer the base config path from the child's defaults key.
+
+    Args:
+        child_path: Resolved path to the child config
+        child_cfg_raw: Raw loaded child config
+
+    Returns:
+        Resolved path to the base config
+
+    Raises:
+        ValueError: If defaults is missing, not a string, or is a list
+    """
+    defaults = child_cfg_raw.get("defaults")
+    if defaults is None:
+        raise ValueError(
+            f"Config {child_path} has no 'defaults' key. "
+            "Either add a 'defaults' key or use --base to specify the base config."
+        )
+    if not isinstance(defaults, str):
+        raise ValueError(
+            f"Config {child_path} has 'defaults' as a list, but only string defaults are "
+            "supported for minimize. Please simplify to a single defaults entry or use --base."
+        )
+    return (child_path.parent / defaults).resolve()
+
+
 def minimize(args: argparse.Namespace) -> int:
     child_path = Path(args.config).resolve()
-    base_path = Path(args.base).resolve()
 
     child_cfg_raw = OmegaConf.load(child_path)
     if not isinstance(child_cfg_raw, DictConfig):
         raise TypeError(
             f"Config at {child_path} must be a mapping (DictConfig), got {type(child_cfg_raw)}"
         )
-    base_cfg_raw = OmegaConf.load(base_path)
-    if not isinstance(base_cfg_raw, DictConfig):
-        raise TypeError(
-            f"Config at {base_path} must be a mapping (DictConfig), got {type(base_cfg_raw)}"
-        )
 
-    # Resolve both before comparison
+    # Determine base: from --base arg or infer from defaults
+    if args.base:
+        base_path = Path(args.base).resolve()
+        base_inferred = False
+        # Load raw base for comparison
+        base_cfg_raw = OmegaConf.load(base_path)
+        if not isinstance(base_cfg_raw, DictConfig):
+            raise TypeError(
+                f"Config at {base_path} must be a mapping (DictConfig), got {type(base_cfg_raw)}"
+            )
+        base_resolved = OmegaConf.to_container(base_cfg_raw)
+    else:
+        base_path = _infer_base_from_defaults(child_path, child_cfg_raw)
+        base_inferred = True
+        # Load EXPANDED base (full inheritance chain) for proper comparison
+        base_resolved = OmegaConf.to_container(load_config(str(base_path)))
+
+    # Get child's explicit values (without defaults key for comparison)
     child_resolved = OmegaConf.to_container(child_cfg_raw)
-    base_resolved = OmegaConf.to_container(base_cfg_raw)
 
     if not isinstance(child_resolved, dict) or not isinstance(base_resolved, dict):
         raise TypeError("Both child and base configs must be mappings after resolution")
 
+    # Remove defaults from child before pruning (we'll handle it separately)
+    child_defaults = child_resolved.pop("defaults", None)
+
     pruned = _prune_equal(child_resolved, base_resolved)
 
     # Ensure mapping output
     if pruned is None or not isinstance(pruned, dict):
         pruned = {} if pruned is None else {"value": pruned}
 
-    # Ensure defaults reference base (relative path from child)
-    _ensure_defaults_relative(child_path, base_path, pruned)
-
-    # Ensure `defaults` appears first in the top-level mapping
-    if "defaults" in pruned:
-        pruned = {"defaults": pruned["defaults"], **pruned}
+    if base_inferred:
+        # Keep the existing defaults as-is
+        if child_defaults is not None:
+            pruned = {"defaults": child_defaults, **pruned}
+    else:
+        # Explicit base: update defaults to point to the new base
+        _ensure_defaults_relative(child_path, base_path, pruned)
+        if "defaults" in pruned:
+            pruned = {"defaults": pruned["defaults"], **pruned}
 
     # Emit
     text = OmegaConf.to_yaml(OmegaConf.create(pruned))
@@ -424,26 +451,43 @@ def minimize_check(args: argparse.Namespace) -> int:
     """Check if minimizing would change the file. Exit non-zero if so.
 
     Args (same as `minimize`):
-      base: Base config path
       config: Child config path
+      base: Optional base config path (inferred from defaults if not provided)
     """
     child_path = Path(args.config).resolve()
-    base_path = Path(args.base).resolve()
 
     # Compute minimized text (same as minimize())
     child_cfg_raw = OmegaConf.load(child_path)
-    base_cfg_raw = OmegaConf.load(base_path)
-    if not isinstance(child_cfg_raw, DictConfig) or not isinstance(
-        base_cfg_raw, DictConfig
-    ):
+    if not isinstance(child_cfg_raw, DictConfig):
         print(
-            f"[minimize-check] Both child and base must be mappings: {child_path} vs {base_path}",
+            f"[minimize-check] Config must be a mapping: {child_path}",
             file=sys.stderr,
         )
         return 2
 
+    # Determine base: from --base arg or infer from defaults
+    if args.base:
+        base_path = Path(args.base).resolve()
+        base_inferred = False
+        base_cfg_raw = OmegaConf.load(base_path)
+        if not isinstance(base_cfg_raw, DictConfig):
+            print(
+                f"[minimize-check] Base config must be a mapping: {base_path}",
+                file=sys.stderr,
+            )
+            return 2
+        base_resolved = OmegaConf.to_container(base_cfg_raw)
+    else:
+        try:
+            base_path = _infer_base_from_defaults(child_path, child_cfg_raw)
+        except ValueError as e:
+            print(f"[minimize-check] {e}", file=sys.stderr)
+            return 2
+        base_inferred = True
+        # Load EXPANDED base (full inheritance chain) for proper comparison
+        base_resolved = OmegaConf.to_container(load_config(str(base_path)))
+
     child_resolved = OmegaConf.to_container(child_cfg_raw)
-    base_resolved = OmegaConf.to_container(base_cfg_raw)
     if not isinstance(child_resolved, dict) or not isinstance(base_resolved, dict):
         print(
             f"[minimize-check] Both child and base must resolve to mappings: {child_path} vs {base_path}",
@@ -451,12 +495,23 @@ def minimize_check(args: argparse.Namespace) -> int:
         )
         return 2
 
+    # Remove defaults from child before pruning (we'll handle it separately)
+    child_defaults = child_resolved.pop("defaults", None)
+
     pruned = _prune_equal(child_resolved, base_resolved)
     if pruned is None or not isinstance(pruned, dict):
         pruned = {} if pruned is None else {"value": pruned}
-    _ensure_defaults_relative(child_path, base_path, pruned)
-    if "defaults" in pruned:
-        pruned = {"defaults": pruned["defaults"], **pruned}
+
+    if base_inferred:
+        # Keep the existing defaults as-is
+        if child_defaults is not None:
+            pruned = {"defaults": child_defaults, **pruned}
+    else:
+        # Explicit base: update defaults to point to the new base
+        _ensure_defaults_relative(child_path, base_path, pruned)
+        if "defaults" in pruned:
+            pruned = {"defaults": pruned["defaults"], **pruned}
+
     minimized_text = OmegaConf.to_yaml(OmegaConf.create(pruned))
 
     # Normalize current file via OmegaConf to reduce noise from formatting differences
@@ -466,9 +521,12 @@ def minimize_check(args: argparse.Namespace) -> int:
         current_norm_text = child_path.read_text()
 
     if current_norm_text != minimized_text:
+        suggested_cmd = f"tools/config_cli.py minimize {child_path} --in-place"
+        if args.base:
+            suggested_cmd = f"tools/config_cli.py minimize {child_path} --base {base_path} --in-place"
         print(
             f"[minimize-check] {child_path} is not minimized.\n"
-            f"  Suggested fix: tools/config_cli.py minimize {base_path} {child_path} --in-place",
+            f"  Suggested fix: {suggested_cmd}",
             file=sys.stderr,
         )
         return 1
@@ -492,10 +550,13 @@ def minimize_check(args: argparse.Namespace) -> int:
 
     p_min = sub.add_parser(
         "minimize",
-        help="Remove keys equal to base and ensure defaults reference base",
+        help="Remove keys equal to inherited values from defaults chain",
+    )
+    p_min.add_argument("config", help="Config file to minimize")
+    p_min.add_argument(
+        "--base",
+        help="Base config path (if not provided, inferred from config's defaults key)",
     )
-    p_min.add_argument("base", help="Base config path")
-    p_min.add_argument("config", help="Child config path")
     p_min.add_argument(
         "--in-place",
         action="store_true",
@@ -513,12 +574,13 @@ def minimize_check(args: argparse.Namespace) -> int:
 
     p_minchk = sub.add_parser(
         "minimize-check",
-        help=(
-            "Exit non-zero if minimizing would change the file; args mirror `minimize`"
-        ),
+        help="Exit non-zero if minimizing would change the file",
+    )
+    p_minchk.add_argument("config", help="Config file to check")
+    p_minchk.add_argument(
+        "--base",
+        help="Base config path (if not provided, inferred from config's defaults key)",
     )
-    p_minchk.add_argument("base", help="Base config path")
-    p_minchk.add_argument("config", help="Child config path")
     p_minchk.set_defaults(func=minimize_check)
 
     args = parser.parse_args()
diff --git a/tools/launch b/tools/launch
index 4c76651cea..47c06746b3 100755
--- a/tools/launch
+++ b/tools/launch
@@ -32,8 +32,8 @@ PROJECT_ROOT=$(realpath $SCRIPT_DIR/..)
 # Function to extract config from a script
 extract_config() {
     local script_path="$1"
-    local config=$(sed -n '/^# =\+ BEGIN CONFIG =\+/,/^# =\+ END CONFIG =\+/p' "$script_path" | 
-                   grep -v "^#" | 
+    local config=$(sed -n '/^# =\+ BEGIN CONFIG =\+/,/^# =\+ END CONFIG =\+/p' "$script_path" |
+                   grep -v "^#" |
                    grep "=" )
     if [[ -z "$config" ]]; then
         echo "[ERROR]: No config section found in script_path=$script_path"
@@ -41,6 +41,7 @@ extract_config() {
         echo
         echo "# ===== BEGIN CONFIG ====="
         echo "NUM_NODES=1        # How many nodes this job uses"
+        echo "GPUS_PER_NODE=8    # GPUs per node (8 for H100, 4 for GB200)"
         echo "STEPS_PER_RUN=60   # Approximately how many steps reached in one job"
         echo "MAX_STEPS=60       # Max training steps"
         echo 'NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up'
@@ -90,6 +91,7 @@ MOUNTS=${MOUNTS:-}
 # DRYRUN=2 additionally creates the snapshots (helpful to run a hermetic example manually or share a repro)
 DRYRUN=${DRYRUN:-}
 IS_RELEASE=${IS_RELEASE:-}  # Adds extra configuration for wandb to track this in the right project
+EXCLUDE_GRES=${EXCLUDE_GRES:-} # If set, will not include --gres=gpu:$GPUS_PER_NODE in the sbatch invocation
 NOW=$(date '+%y%m%d-%H%M%S')
 
 if [[ -n "$MOUNTS" ]]; then
@@ -117,8 +119,11 @@ for SCRIPT in $SCRIPTS; do
     fi
     eval "$config"
 
+    # Default GPUS_PER_NODE to 8 if not specified in config (H100 default)
+    export GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+
     # NUM_RUNS * NUM_NODES * NUM_GPUS * (NUM_MINUTES / 60)
-    gpu_hours=$((NUM_RUNS * NUM_NODES * 8 * NUM_MINUTES / 60))
+    gpu_hours=$((NUM_RUNS * NUM_NODES * GPUS_PER_NODE * NUM_MINUTES / 60))
     total_gpu_hours=$((total_gpu_hours + gpu_hours))
     echo "[INFO]: $gpu_hours GPUhrs to run $SCRIPT"
     if [[ "${DRYRUN}" -eq 1 ]]; then
@@ -127,7 +132,7 @@ for SCRIPT in $SCRIPTS; do
     fi
 
     rel_script=$(check_file_in_version_control_and_get_relpath_from_git_root $SCRIPT)
-    
+
     EXP_NAME=$(basename $SCRIPT .sh)
     SNAPSHOT_DIR=$(bash $PROJECT_ROOT/tools/code_snapshot.sh $EXP_NAME)
 
@@ -143,8 +148,14 @@ for SCRIPT in $SCRIPTS; do
                 logger.wandb.name=$(basename $SCRIPT .sh)-$(git rev-parse --short HEAD)
             )
         fi
-    
+
         # TODO: jq install is just to be backward compatible with older containers. Should eventually remove.
+
+        GRES_ARG="--gres=gpu:$GPUS_PER_NODE "
+        if [[ -n "${EXCLUDE_GRES}" ]]; then
+            GRES_ARG=""
+        fi
+
         cat <<EOF >$SNAPSHOT_DIR/continue.sh
 #!/bin/bash
 SCRIPT_DIR=\$( cd -- "\$( dirname -- "\${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
@@ -161,8 +172,7 @@ sbatch \\
     --account=$ACCOUNT \\
     --job-name=$ACCOUNT:${JOB_NAME}${SLURM_JOB_SUFFIX:-} \\
     --partition=$PARTITION \\
-    --time=0:${NUM_MINUTES}:0 \\
-    --gres=gpu:8 \\
+    --time=0:${NUM_MINUTES}:0 ${GRES_ARG}\\
     --output=slurm-${NOW}-%j-${JOB_NAME}-${i}.${NUM_RUNS}.out \\
     ray.sub
 EOF
diff --git a/uv.lock b/uv.lock
index 5818765dad..8d62191e17 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,19 +2,186 @@ version = 1
 revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'darwin'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+conflicts = [[
+    { package = "nemo-rl", extra = "fsdp" },
+    { package = "nemo-rl", extra = "sglang" },
+], [
+    { package = "nemo-rl", extra = "automodel" },
+    { package = "nemo-rl", extra = "sglang" },
+], [
+    { package = "nemo-rl", extra = "mcore" },
+    { package = "nemo-rl", extra = "sglang" },
+], [
+    { package = "nemo-rl", extra = "sglang" },
+    { package = "nemo-rl", extra = "vllm" },
+]]
 
 [manifest]
 members = [
@@ -28,13 +195,20 @@ members = [
 constraints = [
     { name = "aiohttp", specifier = ">=3.13.3" },
     { name = "brotli", specifier = ">=1.2.0" },
+    { name = "pyasn1", specifier = ">=0.6.2" },
     { name = "starlette", specifier = ">=0.49.1" },
     { name = "urllib3", specifier = ">=2.6.3" },
+    { name = "wheel", specifier = ">=0.46.2" },
 ]
 overrides = [
+    { name = "llguidance", specifier = ">=1.3.0,<1.4.0" },
     { name = "nvidia-modelopt", extras = ["torch"], specifier = ">=0.39.0" },
     { name = "opencv-python-headless", specifier = ">=4.11.0" },
+    { name = "setuptools", specifier = ">=80.10.2" },
     { name = "timm", specifier = "<=1.0.22" },
+    { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.9.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.9.0", index = "https://pypi.org/simple" },
+    { name = "torchaudio", specifier = "==2.9.0" },
     { name = "transformer-engine", extras = ["pytorch"], specifier = "==2.8.0" },
 ]
 
@@ -67,6 +241,11 @@ name = "nv-grouped-gemm"
 version = "1.1.4.post7"
 requires-dist = ["setuptools", "wheel", "torch", "numpy"]
 
+[[manifest.dependency-metadata]]
+name = "sgl-kernel"
+version = "0.3.20"
+requires-dist = ["torch", "scikit-build-core", "wheel"]
+
 [[package]]
 name = "absl-py"
 version = "2.3.1"
@@ -87,8 +266,8 @@ dependencies = [
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f7/66/be171836d86dc5b8698b3a9bf4b9eb10cb53369729939f88bf650167588b/accelerate-1.10.0.tar.gz", hash = "sha256:8270568fda9036b5cccdc09703fef47872abccd56eb5f6d53b54ea5fb7581496", size = 392261, upload-time = "2025-08-07T10:54:51.664Z" }
 wheels = [
@@ -107,12 +286,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" },
 ]
 
-[[package]]
-name = "accumulation-tree"
-version = "0.6.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ac/dc/4ffda8a22b6af3f41bcec07ddfebe723218976eaa016cefbc904634a4e85/accumulation_tree-0.6.4.tar.gz", hash = "sha256:5f907667e4106b5ba140b6b871e1902eb2a93d429b92f8a9f7ddb2bee7704334", size = 12635, upload-time = "2024-09-26T21:50:40.627Z" }
-
 [[package]]
 name = "aiobotocore"
 version = "2.24.3"
@@ -261,13 +434,22 @@ version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "airportsdata"
+version = "20250909"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/c6/17ae8a65f7fa5bbbeee166f8070063eb8b70c89501a65c2e6885db61fc08/airportsdata-20250909.tar.gz", hash = "sha256:f39974fe1101817ced4ccf7c6ed336408469e5e778395d0a3e7a5112ec298f90", size = 907204, upload-time = "2025-09-09T01:07:31.256Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/87/59b570b9c4b014532777dc3baffc9bea10cf0cc8b232cf3c17e4bd0754a6/airportsdata-20250909-py3-none-any.whl", hash = "sha256:ce7dc6e1485afe3915e708212c7024ad158470c1c934e6a6cb217cf28b798ac7", size = 914391, upload-time = "2025-09-09T01:07:29.364Z" },
+]
+
 [[package]]
 name = "alabaster"
 version = "1.0.0"
@@ -341,7 +523,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f1/b4/636b3b65173d3ce9a38ef5f0522789614e590dab6a8d505340a4efe4c567/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6", size = 213252, upload-time = "2025-08-04T08:54:26.451Z" }
 wheels = [
@@ -471,8 +653,8 @@ name = "audioread"
 version = "3.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "standard-aifc", marker = "python_full_version >= '3.13'" },
-    { name = "standard-sunau", marker = "python_full_version >= '3.13'" },
+    { name = "standard-aifc", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "standard-sunau", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a1/4a/874ecf9b472f998130c2b5e145dcdb9f6131e84786111489103b66772143/audioread-3.1.0.tar.gz", hash = "sha256:1c4ab2f2972764c896a8ac61ac53e261c8d29f0c6ccd652f84e18f08a4cab190", size = 20082, upload-time = "2025-10-26T19:44:13.484Z" }
 wheels = [
@@ -760,6 +942,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f5/10/56978295c14794b2c12007b07f3e41ba26acda9257457d7085b0bb3bb90c/brotli-1.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:e7c0af964e0b4e3412a0ebf341ea26ec767fa0b4cf81abb5e897c9338b5ad6a3", size = 375639, upload-time = "2025-11-05T18:38:55.67Z" },
 ]
 
+[[package]]
+name = "build"
+version = "1.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux'" },
+    { name = "packaging" },
+    { name = "pyproject-hooks" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/18/94eaffda7b329535d91f00fe605ab1f1e5cd68b2074d03f255c7d250687d/build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936", size = 50054, upload-time = "2026-01-08T16:41:47.696Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c5/0d/84a4380f930db0010168e0aa7b7a8fed9ba1835a8fbb1472bc6d0201d529/build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596", size = 24141, upload-time = "2026-01-08T16:41:46.453Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -776,8 +972,8 @@ source = { git = "https://github.com/Dao-AILab/causal-conv1d?rev=67e0a9dfe1518fc
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [[package]]
@@ -827,7 +1023,7 @@ name = "cffi"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+    { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
@@ -935,7 +1131,7 @@ name = "click"
 version = "8.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" }
 wheels = [
@@ -974,7 +1170,7 @@ name = "colorful"
 version = "0.5.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0c/0c/d180ebf230b771907f46981023a80f62cf592d49673cc5f8a5993aa67bb6/colorful-0.5.7.tar.gz", hash = "sha256:c5452179b56601c178b03d468a5326cc1fe37d9be81d24d0d6bdab36c4b93ad8", size = 209487, upload-time = "2025-06-30T15:24:03.936Z" }
 wheels = [
@@ -988,8 +1184,8 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "loguru" },
     { name = "pydantic" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "transformers" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a2/79/4c5c1cd14266f8cf2650bdb940f986ce7fcaeb56aad8cfa9e9afedf14e2f/compressed_tensors-0.12.2.tar.gz", hash = "sha256:5bb40856dd17f128ab73557ecc73799f80db4dd82fab6de875f1e6899b9ea0c4", size = 190409, upload-time = "2025-10-07T14:30:59.302Z" }
@@ -1132,7 +1328,7 @@ name = "cryptography"
 version = "46.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
 wheels = [
@@ -1189,7 +1385,7 @@ version = "13.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cuda-pathfinder" },
-    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/85/b5/e90add0eb01d1ceaaae38c944c8a968090eb25dfbe3c81f5300e39c71739/cuda_bindings-13.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a06268a4226c867a7234f12ca183e186e7962a4971b53983c8de182dd62878a3", size = 11929946, upload-time = "2025-08-18T15:29:36.485Z" },
@@ -1243,10 +1439,10 @@ version = "25.3.2"
 source = { git = "https://github.com/apple/ml-cross-entropy.git?rev=87a86ab#87a86aba72cfd2f0d8abecaf81c13c4528ea07d8" }
 dependencies = [
     { name = "setuptools" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "triton", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
-    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [[package]]
@@ -1327,13 +1523,38 @@ name = "decord"
 version = "0.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/11/79/936af42edf90a7bd4e41a6cac89c913d4b47fa48a26b042d5129a9242ee3/decord-0.6.0-py3-none-manylinux2010_x86_64.whl", hash = "sha256:51997f20be8958e23b7c4061ba45d0efcd86bffd5fe81c695d0befee0d442976", size = 13602299, upload-time = "2021-06-14T21:30:55.486Z" },
     { url = "https://files.pythonhosted.org/packages/6c/be/e15b5b866da452e62635a7b27513f31cb581fa2ea9cc9b768b535d62a955/decord-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02665d7c4f1193a330205a791bc128f7e108eb6ae5b67144437a02f700943bad", size = 24733380, upload-time = "2021-06-14T21:30:57.766Z" },
 ]
 
+[[package]]
+name = "decord2"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5e/7b/acd54cde40c18025aaba0a2e8e076d5782beb5d4997360ed2aeb4cab22a9/decord2-3.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cd7a7ad98b5ee26a19c4827e9bd2e8dc4b6afb8344f15ff308c519fd5196d949", size = 20360404, upload-time = "2025-12-18T14:39:00.977Z" },
+    { url = "https://files.pythonhosted.org/packages/70/5d/9922f076649e7dbb2c14e47ecdcac1422ead2cd858a002451665d6e0517b/decord2-3.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fd8474e1f65f12447b1e69106f13eed805aa050be301751268018332839416cd", size = 28662589, upload-time = "2025-12-18T14:39:03.323Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6f/8d9cf20aac657cb0b31892865c524402d3117a5da37a24007c4833c52a57/decord2-3.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:96a429c725fce26fe230b29cffa9507d30b2a4b1af6c99b411b58597afa0eb72", size = 30123769, upload-time = "2025-12-18T14:39:05.933Z" },
+    { url = "https://files.pythonhosted.org/packages/83/97/7aa76800bb80d647215dcf5f471e147f26437ce70c60f01919b03b1583f1/decord2-3.0.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:81b03239fa891dd69ce3796a2095c81ab4bfc483abe2e13934999eb08c4c9e7f", size = 20360404, upload-time = "2025-12-18T14:39:08.422Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/d2/a3b28cc4d914ec2aa893639c85a082450b455b0244a33a3e42fc66255317/decord2-3.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:44fa35c687e873adf7dcdc32bd00cacb27143bca7ef8c3cdd2cfeea2fc8a4d1a", size = 28662588, upload-time = "2025-12-18T14:39:10.717Z" },
+    { url = "https://files.pythonhosted.org/packages/53/6e/7ae997c25c200efcfb5f38af58739d86aa15f9b6ac8ff0edc2ad977d30f4/decord2-3.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:331b69892d594bc1177ac2f2eda97070ba5eec51ef2814da1d39c6ebba0c1213", size = 30123767, upload-time = "2025-12-18T14:39:13.356Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/06/c6771245f9b72aa3ae26d81b625eec8941ad8e4801c2e1d72d749f24867b/decord2-3.0.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9d5af54bb7c4f02eed1c6213b18571c00e2915754438b3b0abd135c6dc03bf3", size = 20360405, upload-time = "2025-12-18T14:39:16.189Z" },
+    { url = "https://files.pythonhosted.org/packages/81/6d/4b183990e60acff7903dffae0c3ea166ea1631681281499a5cd343169dd3/decord2-3.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d8389ccf0a330e1cf828d288c9ba85bb97fd91d1159b739133cafbfe0649ddf6", size = 28662588, upload-time = "2025-12-18T14:39:18.512Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/df/da863d019943e268031bb0c4ae1e0f2c933c6f8320838833413a5755d457/decord2-3.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dfb205d99f17d0e4151629b673002b9b221a4fc25a3ec33911246cdbdb2dd434", size = 30123771, upload-time = "2025-12-18T14:39:20.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/76/6da5cc35421200dfb78f4686755cb2871c8aad0a1abc3408f5a63c7888b0/decord2-3.0.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:68e2c86e74bf872b8db53f4d683f7376e8f57bc1dccee9a4556c0b413e820a79", size = 20360404, upload-time = "2025-12-18T14:39:24.016Z" },
+    { url = "https://files.pythonhosted.org/packages/06/5c/9c9f14653a5a7f8caf99421f234daa1c368ae1443dd0532196e37dcff226/decord2-3.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:77d328dbd9398e893ee709e70a5941cca7b2f430b1ec1d3848cdb019b7cb0582", size = 28662585, upload-time = "2025-12-18T14:39:27.178Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/0f/c5201e52cdbdba761040822c10531f39389d3414e64f4857d2ff59710999/decord2-3.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:377a8a57fdec14006efde11d78415fc6b07ea747ae8dfd3b7002e6befecf42be", size = 30123769, upload-time = "2025-12-18T14:39:29.776Z" },
+    { url = "https://files.pythonhosted.org/packages/24/ab/54fbe8885cfe7793969ac8eaacced48db5ae9e558211ff3828eaf23f3d03/decord2-3.0.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:08600cdda35ab773d4a2f1b695e918c74a66495283396c095012343449c16c61", size = 20360405, upload-time = "2025-12-18T14:39:32.114Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/68/baabd7f03bc0f4e22076b43aed6823b8a8fb6effc18c19a23dc5ce2d80d6/decord2-3.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:962a1a990e0a3e50b31dbc84a6bf44ce33e26f8db0e0e50dfa90119ca114bb1a", size = 28662591, upload-time = "2025-12-18T14:39:34.537Z" },
+    { url = "https://files.pythonhosted.org/packages/de/84/4ab4b48d0d89e19c27ae259ec34ddde7718d4556daa8177594a80ee1b837/decord2-3.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:2bd0abb7c28de5dd0f4e154f71f0d46f297af5efb57061a28055d56c86cc1af9", size = 30123770, upload-time = "2025-12-18T14:39:37.328Z" },
+]
+
 [[package]]
 name = "deep-ep"
 version = "1.2.1+bfded34"
@@ -1341,8 +1562,8 @@ source = { git = "https://github.com/deepseek-ai/DeepEP.git?rev=bfded34800dfec41
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang')" },
 ]
 
 [[package]]
@@ -1352,8 +1573,8 @@ source = { git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1
 dependencies = [
     { name = "ninja" },
     { name = "packaging" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [[package]]
@@ -1474,7 +1695,7 @@ name = "docker"
 version = "7.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "requests" },
     { name = "urllib3" },
 ]
@@ -1544,11 +1765,20 @@ version = "0.1.0"
 source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" }
 dependencies = [
     { name = "absl-py" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "typing-extensions" },
 ]
 
+[[package]]
+name = "execnet"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
+]
+
 [[package]]
 name = "executing"
 version = "2.2.1"
@@ -1576,11 +1806,11 @@ wheels = [
 [package.optional-dependencies]
 standard = [
     { name = "email-validator" },
-    { name = "fastapi-cli", extra = ["standard"] },
+    { name = "fastapi-cli", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
     { name = "httpx" },
     { name = "jinja2" },
     { name = "python-multipart" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "uvicorn", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
 ]
 
 [[package]]
@@ -1590,7 +1820,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "rich-toolkit" },
     { name = "typer" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "uvicorn", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c6/94/3ef75d9c7c32936ecb539b9750ccbdc3d2568efd73b1cb913278375f4533/fastapi_cli-0.0.8.tar.gz", hash = "sha256:2360f2989b1ab4a3d7fc8b3a0b20e8288680d8af2e31de7c38309934d7f8a0ee", size = 16884, upload-time = "2025-07-07T14:44:09.326Z" }
 wheels = [
@@ -1600,7 +1830,7 @@ wheels = [
 [package.optional-dependencies]
 standard = [
     { name = "fastapi-cloud-cli" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "uvicorn", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
 ]
 
 [[package]]
@@ -1609,12 +1839,12 @@ version = "0.1.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "httpx" },
-    { name = "pydantic", extra = ["email"] },
+    { name = "pydantic", extra = ["email"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
     { name = "rich-toolkit" },
     { name = "rignore" },
     { name = "sentry-sdk" },
     { name = "typer" },
-    { name = "uvicorn", extra = ["standard"] },
+    { name = "uvicorn", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a9/2e/3b6e5016affc310e5109bc580f760586eabecea0c8a7ab067611cd849ac0/fastapi_cloud_cli-0.1.5.tar.gz", hash = "sha256:341ee585eb731a6d3c3656cb91ad38e5f39809bf1a16d41de1333e38635a7937", size = 22710, upload-time = "2025-07-28T13:30:48.216Z" }
 wheels = [
@@ -1627,11 +1857,13 @@ version = "0.8.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/73/b1/1c3d635d955f2b4bf34d45abf8f35492e04dbd7804e94ce65d9f928ef3ec/fastrlock-0.8.3.tar.gz", hash = "sha256:4af6734d92eaa3ab4373e6c9a1dd0d5ad1304e172b1521733c6c3b3d73c8fa5d", size = 79327, upload-time = "2024-12-17T11:03:39.638Z" }
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/df/56270f2e10c1428855c990e7a7e5baafa9e1262b8e789200bd1d047eb501/fastrlock-0.8.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8cb2cf04352ea8575d496f31b3b88c42c7976e8e58cdd7d1550dfba80ca039da", size = 55727, upload-time = "2024-12-17T11:02:17.26Z" },
     { url = "https://files.pythonhosted.org/packages/57/21/ea1511b0ef0d5457efca3bf1823effb9c5cad4fc9dca86ce08e4d65330ce/fastrlock-0.8.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:85a49a1f1e020097d087e1963e42cea6f307897d5ebe2cb6daf4af47ffdd3eed", size = 52201, upload-time = "2024-12-17T11:02:19.512Z" },
     { url = "https://files.pythonhosted.org/packages/80/07/cdecb7aa976f34328372f1c4efd6c9dc1b039b3cc8d3f38787d640009a25/fastrlock-0.8.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f13ec08f1adb1aa916c384b05ecb7dbebb8df9ea81abd045f60941c6283a670", size = 53924, upload-time = "2024-12-17T11:02:20.85Z" },
     { url = "https://files.pythonhosted.org/packages/88/6d/59c497f8db9a125066dd3a7442fab6aecbe90d6fec344c54645eaf311666/fastrlock-0.8.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0ea4e53a04980d646def0f5e4b5e8bd8c7884288464acab0b37ca0c65c482bfe", size = 52140, upload-time = "2024-12-17T11:02:22.263Z" },
     { url = "https://files.pythonhosted.org/packages/62/04/9138943c2ee803d62a48a3c17b69de2f6fa27677a6896c300369e839a550/fastrlock-0.8.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:38340f6635bd4ee2a4fb02a3a725759fe921f2ca846cb9ca44531ba739cc17b4", size = 53261, upload-time = "2024-12-17T11:02:24.418Z" },
     { url = "https://files.pythonhosted.org/packages/e2/4b/db35a52589764c7745a613b6943bbd018f128d42177ab92ee7dde88444f6/fastrlock-0.8.3-cp312-cp312-win_amd64.whl", hash = "sha256:da06d43e1625e2ffddd303edcd6d2cd068e1c486f5fd0102b3f079c44eb13e2c", size = 31235, upload-time = "2024-12-17T11:02:25.708Z" },
+    { url = "https://files.pythonhosted.org/packages/92/74/7b13d836c3f221cff69d6f418f46c2a30c4b1fe09a8ce7db02eecb593185/fastrlock-0.8.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5264088185ca8e6bc83181dff521eee94d078c269c7d557cc8d9ed5952b7be45", size = 54157, upload-time = "2024-12-17T11:02:29.196Z" },
     { url = "https://files.pythonhosted.org/packages/06/77/f06a907f9a07d26d0cca24a4385944cfe70d549a2c9f1c3e3217332f4f12/fastrlock-0.8.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a98ba46b3e14927550c4baa36b752d0d2f7387b8534864a8767f83cce75c160", size = 50954, upload-time = "2024-12-17T11:02:32.12Z" },
     { url = "https://files.pythonhosted.org/packages/f9/4e/94480fb3fd93991dd6f4e658b77698edc343f57caa2870d77b38c89c2e3b/fastrlock-0.8.3-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbdea6deeccea1917c6017d353987231c4e46c93d5338ca3e66d6cd88fbce259", size = 52535, upload-time = "2024-12-17T11:02:33.402Z" },
     { url = "https://files.pythonhosted.org/packages/7d/a7/ee82bb55b6c0ca30286dac1e19ee9417a17d2d1de3b13bb0f20cefb86086/fastrlock-0.8.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c6e5bfecbc0d72ff07e43fed81671747914d6794e0926700677ed26d894d4f4f", size = 50942, upload-time = "2024-12-17T11:02:34.688Z" },
@@ -1672,8 +1904,8 @@ version = "0.3.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "einops" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/67/c6/10a1149b07e6bab45b2cb2d07f6b827716c2baf5f3404161753f25c6389b/fla_core-0.3.2.tar.gz", hash = "sha256:d38db16bc4e1c6fa8c04df442f246da1e6926a209426bc6ef703d41bfbc37c92", size = 296725, upload-time = "2025-09-10T07:43:40.155Z" }
 wheels = [
@@ -1689,8 +1921,8 @@ dependencies = [
     { name = "ninja" },
     { name = "psutil" },
     { name = "setuptools" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/e8/6d/7066d160bdffa2f9da29a8c3957f266b17a03ca0b3bdc8fdae86d9881fe7/flash_attn-2.8.1.tar.gz", hash = "sha256:0ff003899fcb244f357905b04f622d5c9736887126dd6675f8f4bc52954e3923", size = 8166563, upload-time = "2025-07-10T05:16:39.729Z" }
 
@@ -1709,31 +1941,87 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/d0/35ce9eac5f52c72005095aaa12a393d2656ed7ffedf925b2381a6b76d10c/flash_linear_attention-0.3.2-py3-none-any.whl", hash = "sha256:604e73361437ba786420ab195e2caa3fd19280503761e703fa353c5ce5c65376", size = 274592, upload-time = "2025-09-10T07:43:39.107Z" },
 ]
 
+[[package]]
+name = "flashinfer-cubin"
+version = "0.5.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/81/7e3fdd9dfef8992ec2297a3a375660b45c96923da48541f8cd0c36fc6711/flashinfer_cubin-0.5.3-py3-none-any.whl", hash = "sha256:30a172ffc21856fcdcf96672ac780ce80f703e82cc1626f2c5344cf769d401a7", size = 103550617, upload-time = "2025-11-24T08:54:38.241Z" },
+]
+
 [[package]]
 name = "flashinfer-python"
 version = "0.5.2"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
 dependencies = [
-    { name = "apache-tvm-ffi" },
-    { name = "click" },
-    { name = "einops" },
-    { name = "ninja" },
-    { name = "numpy" },
-    { name = "nvidia-cudnn-frontend" },
-    { name = "nvidia-cutlass-dsl" },
-    { name = "nvidia-ml-py" },
-    { name = "packaging" },
-    { name = "requests" },
-    { name = "tabulate" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "tqdm" },
+    { name = "apache-tvm-ffi", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "click", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "einops", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "ninja", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "numpy", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "nvidia-cudnn-frontend", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "nvidia-cutlass-dsl", version = "4.3.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "nvidia-ml-py", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "packaging", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "requests", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "tabulate", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "tqdm", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d8/04/e357eaa50238e12c49e66fcf47f83e066e741ef19a117c136782b32eafbb/flashinfer_python-0.5.2.tar.gz", hash = "sha256:99d097a28be1e98c7f85e4a767e9e9a4794374f9318c27db14d21e367149063f", size = 4632657, upload-time = "2025-11-07T02:53:27.261Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/0c/4a8ffbbc0d85e314f534cf5c32711f2af5d5e6e49225a5a414400a67b684/flashinfer_python-0.5.2-py3-none-any.whl", hash = "sha256:739c27d86d5ff4e3ad1ea41dcb90bda08e44c332549bf696f9c9c5c57f608e63", size = 6936306, upload-time = "2025-11-07T02:53:25.515Z" },
 ]
 
+[[package]]
+name = "flashinfer-python"
+version = "0.5.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "apache-tvm-ffi", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "click", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "einops", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "ninja", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "numpy", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cudnn-frontend", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cutlass-dsl", version = "4.2.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-sglang'" },
+    { name = "nvidia-cutlass-dsl", version = "4.3.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-ml-py", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "packaging", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "requests", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "tabulate", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "tqdm", marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b4/91/cca69baeff24bb3efd12c7479a026432c8717ee47193694010494c528b22/flashinfer_python-0.5.3.tar.gz", hash = "sha256:100d59b0ede47878d2808cd3a1b9039d7a952d66338bc9f68dac192ae1b2e3f1", size = 4682367, upload-time = "2025-11-20T21:22:46.976Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/78/6dc7e7da8cb87c9965644ea0d2439457a1bc9256c45ceda0044595be4143/flashinfer_python-0.5.3-py3-none-any.whl", hash = "sha256:b601293b72f9138bad173edc28df84b9f239a013be974e2e79d4ba98aeb38cf5", size = 6998069, upload-time = "2025-11-20T21:22:45.104Z" },
+]
+
 [[package]]
 name = "flask"
 version = "3.1.2"
@@ -1978,7 +2266,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiofiles" },
     { name = "anyio" },
-    { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
+    { name = "audioop-lts", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "brotli" },
     { name = "fastapi" },
     { name = "ffmpy" },
@@ -2160,30 +2448,112 @@ wheels = [
 
 [[package]]
 name = "grpcio"
-version = "1.74.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/38/b4/35feb8f7cab7239c5b94bd2db71abb3d6adb5f335ad8f131abb6060840b6/grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1", size = 12756048, upload-time = "2025-07-24T18:54:23.039Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/5d/e504d5d5c4469823504f65687d6c8fb97b7f7bf0b34873b7598f1df24630/grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8", size = 5445551, upload-time = "2025-07-24T18:53:23.641Z" },
-    { url = "https://files.pythonhosted.org/packages/43/01/730e37056f96f2f6ce9f17999af1556df62ee8dab7fa48bceeaab5fd3008/grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6", size = 10979810, upload-time = "2025-07-24T18:53:25.349Z" },
-    { url = "https://files.pythonhosted.org/packages/79/3d/09fd100473ea5c47083889ca47ffd356576173ec134312f6aa0e13111dee/grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5", size = 5941946, upload-time = "2025-07-24T18:53:27.387Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/99/12d2cca0a63c874c6d3d195629dcd85cdf5d6f98a30d8db44271f8a97b93/grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49", size = 6621763, upload-time = "2025-07-24T18:53:29.193Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/2c/930b0e7a2f1029bbc193443c7bc4dc2a46fedb0203c8793dcd97081f1520/grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7", size = 6180664, upload-time = "2025-07-24T18:53:30.823Z" },
-    { url = "https://files.pythonhosted.org/packages/db/d5/ff8a2442180ad0867717e670f5ec42bfd8d38b92158ad6bcd864e6d4b1ed/grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3", size = 6301083, upload-time = "2025-07-24T18:53:32.454Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/ba/b361d390451a37ca118e4ec7dccec690422e05bc85fba2ec72b06cefec9f/grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707", size = 6994132, upload-time = "2025-07-24T18:53:34.506Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/0c/3a5fa47d2437a44ced74141795ac0251bbddeae74bf81df3447edd767d27/grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b", size = 6489616, upload-time = "2025-07-24T18:53:36.217Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/95/ab64703b436d99dc5217228babc76047d60e9ad14df129e307b5fec81fd0/grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c", size = 3807083, upload-time = "2025-07-24T18:53:37.911Z" },
-    { url = "https://files.pythonhosted.org/packages/84/59/900aa2445891fc47a33f7d2f76e00ca5d6ae6584b20d19af9c06fa09bf9a/grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc", size = 4490123, upload-time = "2025-07-24T18:53:39.528Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/d8/1004a5f468715221450e66b051c839c2ce9a985aa3ee427422061fcbb6aa/grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89", size = 5449488, upload-time = "2025-07-24T18:53:41.174Z" },
-    { url = "https://files.pythonhosted.org/packages/94/0e/33731a03f63740d7743dced423846c831d8e6da808fcd02821a4416df7fa/grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01", size = 10974059, upload-time = "2025-07-24T18:53:43.066Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/c6/3d2c14d87771a421205bdca991467cfe473ee4c6a1231c1ede5248c62ab8/grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e", size = 5945647, upload-time = "2025-07-24T18:53:45.269Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/83/5a354c8aaff58594eef7fffebae41a0f8995a6258bbc6809b800c33d4c13/grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91", size = 6626101, upload-time = "2025-07-24T18:53:47.015Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/ca/4fdc7bf59bf6994aa45cbd4ef1055cd65e2884de6113dbd49f75498ddb08/grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249", size = 6182562, upload-time = "2025-07-24T18:53:48.967Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/48/2869e5b2c1922583686f7ae674937986807c2f676d08be70d0a541316270/grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362", size = 6303425, upload-time = "2025-07-24T18:53:50.847Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/0e/bac93147b9a164f759497bc6913e74af1cb632c733c7af62c0336782bd38/grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f", size = 6996533, upload-time = "2025-07-24T18:53:52.747Z" },
-    { url = "https://files.pythonhosted.org/packages/84/35/9f6b2503c1fd86d068b46818bbd7329db26a87cdd8c01e0d1a9abea1104c/grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20", size = 6491489, upload-time = "2025-07-24T18:53:55.06Z" },
-    { url = "https://files.pythonhosted.org/packages/75/33/a04e99be2a82c4cbc4039eb3a76f6c3632932b9d5d295221389d10ac9ca7/grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa", size = 3805811, upload-time = "2025-07-24T18:53:56.798Z" },
-    { url = "https://files.pythonhosted.org/packages/34/80/de3eb55eb581815342d097214bed4c59e806b05f1b3110df03b2280d6dfd/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24", size = 4489214, upload-time = "2025-07-24T18:53:59.771Z" },
+version = "1.75.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/f7/8963848164c7604efb3a3e6ee457fdb3a469653e19002bd24742473254f8/grpcio-1.75.1.tar.gz", hash = "sha256:3e81d89ece99b9ace23a6916880baca613c03a799925afb2857887efa8b1b3d2", size = 12731327, upload-time = "2025-09-26T09:03:36.887Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/81/42be79e73a50aaa20af66731c2defeb0e8c9008d9935a64dd8ea8e8c44eb/grpcio-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:7b888b33cd14085d86176b1628ad2fcbff94cfbbe7809465097aa0132e58b018", size = 5668314, upload-time = "2025-09-26T09:01:55.424Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/a7/3686ed15822fedc58c22f82b3a7403d9faf38d7c33de46d4de6f06e49426/grpcio-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:8775036efe4ad2085975531d221535329f5dac99b6c2a854a995456098f99546", size = 11476125, upload-time = "2025-09-26T09:01:57.927Z" },
+    { url = "https://files.pythonhosted.org/packages/14/85/21c71d674f03345ab183c634ecd889d3330177e27baea8d5d247a89b6442/grpcio-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb658f703468d7fbb5dcc4037c65391b7dc34f808ac46ed9136c24fc5eeb041d", size = 6246335, upload-time = "2025-09-26T09:02:00.76Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/db/3beb661bc56a385ae4fa6b0e70f6b91ac99d47afb726fe76aaff87ebb116/grpcio-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4b7177a1cdb3c51b02b0c0a256b0a72fdab719600a693e0e9037949efffb200b", size = 6916309, upload-time = "2025-09-26T09:02:02.894Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/9c/eda9fe57f2b84343d44c1b66cf3831c973ba29b078b16a27d4587a1fdd47/grpcio-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7d4fa6ccc3ec2e68a04f7b883d354d7fea22a34c44ce535a2f0c0049cf626ddf", size = 6435419, upload-time = "2025-09-26T09:02:05.055Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/b8/090c98983e0a9d602e3f919a6e2d4e470a8b489452905f9a0fa472cac059/grpcio-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d86880ecaeb5b2f0a8afa63824de93adb8ebe4e49d0e51442532f4e08add7d6", size = 7064893, upload-time = "2025-09-26T09:02:07.275Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/c0/6d53d4dbbd00f8bd81571f5478d8a95528b716e0eddb4217cc7cb45aae5f/grpcio-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a8041d2f9e8a742aeae96f4b047ee44e73619f4f9d24565e84d5446c623673b6", size = 8011922, upload-time = "2025-09-26T09:02:09.527Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7c/48455b2d0c5949678d6982c3e31ea4d89df4e16131b03f7d5c590811cbe9/grpcio-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3652516048bf4c314ce12be37423c79829f46efffb390ad64149a10c6071e8de", size = 7466181, upload-time = "2025-09-26T09:02:12.279Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/12/04a0e79081e3170b6124f8cba9b6275871276be06c156ef981033f691880/grpcio-1.75.1-cp312-cp312-win32.whl", hash = "sha256:44b62345d8403975513af88da2f3d5cc76f73ca538ba46596f92a127c2aea945", size = 3938543, upload-time = "2025-09-26T09:02:14.77Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/d7/11350d9d7fb5adc73d2b0ebf6ac1cc70135577701e607407fe6739a90021/grpcio-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:b1e191c5c465fa777d4cafbaacf0c01e0d5278022082c0abbd2ee1d6454ed94d", size = 4641938, upload-time = "2025-09-26T09:02:16.927Z" },
+    { url = "https://files.pythonhosted.org/packages/46/74/bac4ab9f7722164afdf263ae31ba97b8174c667153510322a5eba4194c32/grpcio-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:3bed22e750d91d53d9e31e0af35a7b0b51367e974e14a4ff229db5b207647884", size = 5672779, upload-time = "2025-09-26T09:02:19.11Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/52/d0483cfa667cddaa294e3ab88fd2c2a6e9dc1a1928c0e5911e2e54bd5b50/grpcio-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:5b8f381eadcd6ecaa143a21e9e80a26424c76a0a9b3d546febe6648f3a36a5ac", size = 11470623, upload-time = "2025-09-26T09:02:22.117Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e4/d1954dce2972e32384db6a30273275e8c8ea5a44b80347f9055589333b3f/grpcio-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5bf4001d3293e3414d0cf99ff9b1139106e57c3a66dfff0c5f60b2a6286ec133", size = 6248838, upload-time = "2025-09-26T09:02:26.426Z" },
+    { url = "https://files.pythonhosted.org/packages/06/43/073363bf63826ba8077c335d797a8d026f129dc0912b69c42feaf8f0cd26/grpcio-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f82ff474103e26351dacfe8d50214e7c9322960d8d07ba7fa1d05ff981c8b2d", size = 6922663, upload-time = "2025-09-26T09:02:28.724Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/6f/076ac0df6c359117676cacfa8a377e2abcecec6a6599a15a672d331f6680/grpcio-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ee119f4f88d9f75414217823d21d75bfe0e6ed40135b0cbbfc6376bc9f7757d", size = 6436149, upload-time = "2025-09-26T09:02:30.971Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/27/1d08824f1d573fcb1fa35ede40d6020e68a04391709939e1c6f4193b445f/grpcio-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:664eecc3abe6d916fa6cf8dd6b778e62fb264a70f3430a3180995bf2da935446", size = 7067989, upload-time = "2025-09-26T09:02:33.233Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/98/98594cf97b8713feb06a8cb04eeef60b4757e3e2fb91aa0d9161da769843/grpcio-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c32193fa08b2fbebf08fe08e84f8a0aad32d87c3ad42999c65e9449871b1c66e", size = 8010717, upload-time = "2025-09-26T09:02:36.011Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/7e/bb80b1bba03c12158f9254762cdf5cced4a9bc2e8ed51ed335915a5a06ef/grpcio-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5cebe13088b9254f6e615bcf1da9131d46cfa4e88039454aca9cb65f639bd3bc", size = 7463822, upload-time = "2025-09-26T09:02:38.26Z" },
+    { url = "https://files.pythonhosted.org/packages/23/1c/1ea57fdc06927eb5640f6750c697f596f26183573069189eeaf6ef86ba2d/grpcio-1.75.1-cp313-cp313-win32.whl", hash = "sha256:4b4c678e7ed50f8ae8b8dbad15a865ee73ce12668b6aaf411bf3258b5bc3f970", size = 3938490, upload-time = "2025-09-26T09:02:40.268Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/24/fbb8ff1ccadfbf78ad2401c41aceaf02b0d782c084530d8871ddd69a2d49/grpcio-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:5573f51e3f296a1bcf71e7a690c092845fb223072120f4bdb7a5b48e111def66", size = 4642538, upload-time = "2025-09-26T09:02:42.519Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/1b/9a0a5cecd24302b9fdbcd55d15ed6267e5f3d5b898ff9ac8cbe17ee76129/grpcio-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:c05da79068dd96723793bffc8d0e64c45f316248417515f28d22204d9dae51c7", size = 5673319, upload-time = "2025-09-26T09:02:44.742Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/ec/9d6959429a83fbf5df8549c591a8a52bb313976f6646b79852c4884e3225/grpcio-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06373a94fd16ec287116a825161dca179a0402d0c60674ceeec8c9fba344fe66", size = 11480347, upload-time = "2025-09-26T09:02:47.539Z" },
+    { url = "https://files.pythonhosted.org/packages/09/7a/26da709e42c4565c3d7bf999a9569da96243ce34a8271a968dee810a7cf1/grpcio-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4484f4b7287bdaa7a5b3980f3c7224c3c622669405d20f69549f5fb956ad0421", size = 6254706, upload-time = "2025-09-26T09:02:50.4Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/08/dcb26a319d3725f199c97e671d904d84ee5680de57d74c566a991cfab632/grpcio-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:2720c239c1180eee69f7883c1d4c83fc1a495a2535b5fa322887c70bf02b16e8", size = 6922501, upload-time = "2025-09-26T09:02:52.711Z" },
+    { url = "https://files.pythonhosted.org/packages/78/66/044d412c98408a5e23cb348845979a2d17a2e2b6c3c34c1ec91b920f49d0/grpcio-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:07a554fa31c668cf0e7a188678ceeca3cb8fead29bbe455352e712ec33ca701c", size = 6437492, upload-time = "2025-09-26T09:02:55.542Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/9d/5e3e362815152aa1afd8b26ea613effa005962f9da0eec6e0e4527e7a7d1/grpcio-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3e71a2105210366bfc398eef7f57a664df99194f3520edb88b9c3a7e46ee0d64", size = 7081061, upload-time = "2025-09-26T09:02:58.261Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/1a/46615682a19e100f46e31ddba9ebc297c5a5ab9ddb47b35443ffadb8776c/grpcio-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:8679aa8a5b67976776d3c6b0521e99d1c34db8a312a12bcfd78a7085cb9b604e", size = 8010849, upload-time = "2025-09-26T09:03:00.548Z" },
+    { url = "https://files.pythonhosted.org/packages/67/8e/3204b94ac30b0f675ab1c06540ab5578660dc8b690db71854d3116f20d00/grpcio-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aad1c774f4ebf0696a7f148a56d39a3432550612597331792528895258966dc0", size = 7464478, upload-time = "2025-09-26T09:03:03.096Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/97/2d90652b213863b2cf466d9c1260ca7e7b67a16780431b3eb1d0420e3d5b/grpcio-1.75.1-cp314-cp314-win32.whl", hash = "sha256:62ce42d9994446b307649cb2a23335fa8e927f7ab2cbf5fcb844d6acb4d85f9c", size = 4012672, upload-time = "2025-09-26T09:03:05.477Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/df/e2e6e9fc1c985cd1a59e6996a05647c720fe8a03b92f5ec2d60d366c531e/grpcio-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:f86e92275710bea3000cb79feca1762dc0ad3b27830dd1a74e82ab321d4ee464", size = 4772475, upload-time = "2025-09-26T09:03:07.661Z" },
+]
+
+[[package]]
+name = "grpcio-health-checking"
+version = "1.75.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f9/3d/ed141f8b19b40f41b7fe5432c1ecb10c54ef002e46466cd8450f9ef621f7/grpcio_health_checking-1.75.1.tar.gz", hash = "sha256:888ea1b86ad65c02c8547486e95263562e145363e3d5400f5244f7f2c5323e63", size = 16766, upload-time = "2025-09-26T09:13:17.171Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/44/37245c53f61a66002a6a451ba63be080c100b7c1dfc54ae2af4403452bd9/grpcio_health_checking-1.75.1-py3-none-any.whl", hash = "sha256:f9d3eae78c13bfe81105a6433fbf7c4ad04ea1f517e9110fde35391d56ec760e", size = 18921, upload-time = "2025-09-26T09:12:37.076Z" },
+]
+
+[[package]]
+name = "grpcio-reflection"
+version = "1.75.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/49/a3/95a7a03fcd44f6dedb8196aa98ef71983b86b4d465d181b17def85983449/grpcio_reflection-1.75.1.tar.gz", hash = "sha256:2be3f20b7b93e6e691a0bc761fd7e9996a940b4c96c68f6ca4f7fbc47c3f4b64", size = 18858, upload-time = "2025-09-26T09:13:21.706Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1f/8e/0e78fa08735c9759367da60e863e64923ab4c93f2976a8b51fd0a5adb526/grpcio_reflection-1.75.1-py3-none-any.whl", hash = "sha256:17ef1504c9efd58662e56090379885e5f3f7985ce481cf30d6b1cb25f55ab0ae", size = 22697, upload-time = "2025-09-26T09:12:25.932Z" },
+]
+
+[[package]]
+name = "grpcio-tools"
+version = "1.75.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "grpcio" },
+    { name = "protobuf" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/76/0cd2a2bb379275c319544a3ab613dc3cea7a167503908c1b4de55f82bd9e/grpcio_tools-1.75.1.tar.gz", hash = "sha256:bb78960cf3d58941e1fec70cbdaccf255918beed13c34112a6915a6d8facebd1", size = 5390470, upload-time = "2025-09-26T09:10:11.948Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/a7/581bb204d19a347303ed5e25b19f7d8c6365a28c242fca013d1d6d78ad7e/grpcio_tools-1.75.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:49b68936cf212052eeafa50b824e17731b78d15016b235d36e0d32199000b14c", size = 2546099, upload-time = "2025-09-26T09:08:28.794Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/59/ab65998eba14ff9d292c880f6a276fe7d0571bba3bb4ddf66aca1f8438b5/grpcio_tools-1.75.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:08cb6e568e58b76a2178ad3b453845ff057131fff00f634d7e15dcd015cd455b", size = 5839838, upload-time = "2025-09-26T09:08:31.038Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/65/7027f71069b4c1e8c7b46de8c46c297c9d28ef6ed4ea0161e8c82c75d1d0/grpcio_tools-1.75.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:168402ad29a249092673079cf46266936ec2fb18d4f854d96e9c5fa5708efa39", size = 2592916, upload-time = "2025-09-26T09:08:33.216Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/84/1abfb3c679b78c7fca7524031cf9de4c4c509c441b48fd26291ac16dd1af/grpcio_tools-1.75.1-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:bbae11c29fcf450730f021bfc14b12279f2f985e2e493ccc2f133108728261db", size = 2905276, upload-time = "2025-09-26T09:08:35.691Z" },
+    { url = "https://files.pythonhosted.org/packages/99/cd/7f9e05f1eddccb61bc0ead1e49eb2222441957b02ed11acfcd2f795b03a8/grpcio_tools-1.75.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38c6c7d5d4800f636ee691cd073db1606d1a6a76424ca75c9b709436c9c20439", size = 2656424, upload-time = "2025-09-26T09:08:38.255Z" },
+    { url = "https://files.pythonhosted.org/packages/29/1d/8b7852771c2467728341f7b9c3ca4ebc76e4e23485c6a3e6d97a8323ad2a/grpcio_tools-1.75.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:626f6a61a8f141dde9a657775854d1c0d99509f9a2762b82aa401a635f6ec73d", size = 3108985, upload-time = "2025-09-26T09:08:40.291Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/6a/069da89cdf2e97e4558bfceef5b60bf0ef200c443b465e7691869006dd32/grpcio_tools-1.75.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:f61a8334ae38d4f98c744a732b89527e5af339d17180e25fff0676060f8709b7", size = 3657940, upload-time = "2025-09-26T09:08:42.437Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/e4/ca8dae800c084beb89e2720346f70012d36dfb9df02d8eacd518c06cf4a0/grpcio_tools-1.75.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd0c3fb40d89a1e24a41974e77c7331e80396ab7cde39bc396a13d6b5e2a750b", size = 3324878, upload-time = "2025-09-26T09:08:45.083Z" },
+    { url = "https://files.pythonhosted.org/packages/58/06/cbe923679309bf970923f4a11351ea9e485291b504d7243130fdcfdcb03f/grpcio_tools-1.75.1-cp312-cp312-win32.whl", hash = "sha256:004bc5327593eea48abd03be3188e757c3ca0039079587a6aac24275127cac20", size = 993071, upload-time = "2025-09-26T09:08:46.785Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/0c/84d6be007262c5d88a590082f3a1fe62d4b0eeefa10c6cdb3548f3663e80/grpcio_tools-1.75.1-cp312-cp312-win_amd64.whl", hash = "sha256:23952692160b5fe7900653dfdc9858dc78c2c42e15c27e19ee780c8917ba6028", size = 1157506, upload-time = "2025-09-26T09:08:48.844Z" },
+    { url = "https://files.pythonhosted.org/packages/47/fa/624bbe1b2ccf4f6044bf3cd314fe2c35f78f702fcc2191dc65519baddca4/grpcio_tools-1.75.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:ca9e116aab0ecf4365fc2980f2e8ae1b22273c3847328b9a8e05cbd14345b397", size = 2545752, upload-time = "2025-09-26T09:08:51.433Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/4c/6d884e2337feff0a656e395338019adecc3aa1daeae9d7e8eb54340d4207/grpcio_tools-1.75.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:9fe87a926b65eb7f41f8738b6d03677cc43185ff77a9d9b201bdb2f673f3fa1e", size = 5838163, upload-time = "2025-09-26T09:08:53.858Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/2a/2ba7b6911a754719643ed92ae816a7f989af2be2882b9a9e1f90f4b0e882/grpcio_tools-1.75.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:45503a6094f91b3fd31c3d9adef26ac514f102086e2a37de797e220a6791ee87", size = 2592148, upload-time = "2025-09-26T09:08:55.86Z" },
+    { url = "https://files.pythonhosted.org/packages/88/db/fa613a45c3c7b00f905bd5ad3a93c73194724d0a2dd72adae3be32983343/grpcio_tools-1.75.1-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b01b60b3de67be531a39fd869d7613fa8f178aff38c05e4d8bc2fc530fa58cb5", size = 2905215, upload-time = "2025-09-26T09:08:58.27Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/0c/ee4786972bb82f60e4f313bb2227c79c2cd20eb13c94c0263067923cfd12/grpcio_tools-1.75.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:09e2b9b9488735514777d44c1e4eda813122d2c87aad219f98d5d49b359a8eab", size = 2656251, upload-time = "2025-09-26T09:09:00.249Z" },
+    { url = "https://files.pythonhosted.org/packages/77/f1/cc5a50658d705d0b71ff8a4fbbfcc6279d3c95731a2ef7285e13dc40e2fe/grpcio_tools-1.75.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:55e60300e62b220fabe6f062fe69f143abaeff3335f79b22b56d86254f3c3c80", size = 3108911, upload-time = "2025-09-26T09:09:02.515Z" },
+    { url = "https://files.pythonhosted.org/packages/09/d8/43545f77c4918e778e90bc2c02b3462ac71cee14f29d85cdb69b089538eb/grpcio_tools-1.75.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:49ce00fcc6facbbf52bf376e55b8e08810cecd03dab0b3a2986d73117c6f6ee4", size = 3657021, upload-time = "2025-09-26T09:09:05.331Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/0b/2ae5925374b66bc8df5b828eff1a5f9459349c83dae1773f0aa9858707e6/grpcio_tools-1.75.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:71e95479aea868f8c8014d9dc4267f26ee75388a0d8a552e1648cfa0b53d24b4", size = 3324450, upload-time = "2025-09-26T09:09:07.867Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/53/9f887bacbecf892ac5b0b282477ca8cfa5b73911b04259f0d88b52e9a055/grpcio_tools-1.75.1-cp313-cp313-win32.whl", hash = "sha256:fff9d2297416eae8861e53154ccf70a19994e5935e6c8f58ebf431f81cbd8d12", size = 992434, upload-time = "2025-09-26T09:09:09.966Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/f0/9979d97002edffdc2a88e5f2e0dccea396dd4a6eab34fa2f705fe43eae2f/grpcio_tools-1.75.1-cp313-cp313-win_amd64.whl", hash = "sha256:1849ddd508143eb48791e81d42ddc924c554d1b4900e06775a927573a8d4267f", size = 1157069, upload-time = "2025-09-26T09:09:12.287Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/0b/4ff4ead293f2b016668628a240937828444094778c8037d2bbef700e9097/grpcio_tools-1.75.1-cp314-cp314-linux_armv7l.whl", hash = "sha256:f281b594489184b1f9a337cdfed1fc1ddb8428f41c4b4023de81527e90b38e1e", size = 2545868, upload-time = "2025-09-26T09:09:14.716Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/78/aa6bf73a18de5357c01ef87eea92150931586b25196fa4df197a37bae11d/grpcio_tools-1.75.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:becf8332f391abc62bf4eea488b63be063d76a7cf2ef00b2e36c617d9ee9216b", size = 5838010, upload-time = "2025-09-26T09:09:20.415Z" },
+    { url = "https://files.pythonhosted.org/packages/99/65/7eaad673bc971af45e079d3b13c20d9ba9842b8788d31953e3234c2e2cee/grpcio_tools-1.75.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a08330f24e5cd7b39541882a95a8ba04ffb4df79e2984aa0cd01ed26dcdccf49", size = 2593170, upload-time = "2025-09-26T09:09:22.889Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/db/57e1e29e9186c7ed223ce8a9b609d3f861c4db015efb643dfe60b403c137/grpcio_tools-1.75.1-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:6bf3742bd8f102630072ed317d1496f31c454cd85ad19d37a68bd85bf9d5f8b9", size = 2905167, upload-time = "2025-09-26T09:09:25.96Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/7b/894f891f3cf19812192f8bbf1e0e1c958055676ecf0a5466a350730a006d/grpcio_tools-1.75.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f26028949474feb380460ce52d9d090d00023940c65236294a66c42ac5850e8b", size = 2656210, upload-time = "2025-09-26T09:09:28.786Z" },
+    { url = "https://files.pythonhosted.org/packages/99/76/8e48427da93ef243c09629969c7b5a2c59dceb674b6b623c1f5fbaa5c8c5/grpcio_tools-1.75.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1bd68fb98bf08f11b6c3210834a14eefe585bad959bdba38e78b4ae3b04ba5bd", size = 3109226, upload-time = "2025-09-26T09:09:31.307Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/7e/ecf71c316c2a88c2478b7c6372d0f82d05f07edbf0f31b6da613df99ec7c/grpcio_tools-1.75.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f1496e21586193da62c3a73cd16f9c63c5b3efd68ff06dab96dbdfefa90d40bf", size = 3657139, upload-time = "2025-09-26T09:09:35.043Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/f3/b2613e81da2085f40a989c0601ec9efc11e8b32fcb71b1234b64a18af830/grpcio_tools-1.75.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:14a78b1e36310cdb3516cdf9ee2726107875e0b247e2439d62fc8dc38cf793c1", size = 3324513, upload-time = "2025-09-26T09:09:37.44Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/1f/2df4fa8634542524bc22442ffe045d41905dae62cc5dd14408b80c5ac1b8/grpcio_tools-1.75.1-cp314-cp314-win32.whl", hash = "sha256:0e6f916daf222002fb98f9a6f22de0751959e7e76a24941985cc8e43cea77b50", size = 1015283, upload-time = "2025-09-26T09:09:39.461Z" },
+    { url = "https://files.pythonhosted.org/packages/23/4f/f27c973ff50486a70be53a3978b6b0244398ca170a4e19d91988b5295d92/grpcio_tools-1.75.1-cp314-cp314-win_amd64.whl", hash = "sha256:878c3b362264588c45eba57ce088755f8b2b54893d41cc4a68cdeea62996da5c", size = 1189364, upload-time = "2025-09-26T09:09:42.036Z" },
 ]
 
 [[package]]
@@ -2191,7 +2561,7 @@ name = "gunicorn"
 version = "23.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "packaging", marker = "sys_platform != 'win32'" },
+    { name = "packaging" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" }
 wheels = [
@@ -2235,6 +2605,38 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/08/e7/ae38d7a6dfba0533684e0b2136817d667588ae3ec984c1a4e5df5eb88482/hatchling-1.27.0-py3-none-any.whl", hash = "sha256:d3a2f3567c4f926ea39849cdf924c7e99e6686c9c8e288ae1037c8fa2a5d937b", size = 75794, upload-time = "2024-12-15T17:08:10.364Z" },
 ]
 
+[[package]]
+name = "hf-transfer"
+version = "0.1.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201, upload-time = "2025-01-07T10:05:12.947Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/78/0dce00208f585fae675f40033ef9a30dedfa83665d5ac79f16beb4a0a6c2/hf_transfer-0.1.9-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:6e94e8822da79573c9b6ae4d6b2f847c59a7a06c5327d7db20751b68538dc4f6", size = 1386084, upload-time = "2025-01-07T10:04:47.874Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/2e/3d60b1a9e9f29a2152aa66c823bf5e399ae7be3fef310ff0de86779c5d2d/hf_transfer-0.1.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ebc4ab9023414880c8b1d3c38174d1c9989eb5022d37e814fa91a3060123eb0", size = 1343558, upload-time = "2025-01-07T10:04:42.313Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/38/130a5ac3747f104033591bcac1c961cb1faadfdc91704f59b09c0b465ff2/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8674026f21ed369aa2a0a4b46000aca850fc44cd2b54af33a172ce5325b4fc82", size = 3726676, upload-time = "2025-01-07T10:04:11.539Z" },
+    { url = "https://files.pythonhosted.org/packages/15/a1/f4e27c5ad17aac616ae0849e2aede5aae31db8267a948c6b3eeb9fd96446/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a736dfbb2c84f5a2c975478ad200c0c8bfcb58a25a35db402678fb87ce17fa4", size = 3062920, upload-time = "2025-01-07T10:04:16.297Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/0d/727abdfba39bc3f1132cfa4c970588c2c0bb0d82fe2d645cc10f4e2f8e0b/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:504b8427fd785dd8546d53b9fafe6e436bd7a3adf76b9dce556507650a7b4567", size = 3578681, upload-time = "2025-01-07T10:04:29.702Z" },
+    { url = "https://files.pythonhosted.org/packages/50/d0/2b213eb1ea8b1252ccaf1a6c804d0aba03fea38aae4124df6a3acb70511a/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c7fc1b85f4d0f76e452765d7648c9f4bfd0aedb9ced2ae1ebfece2d8cfaf8e2", size = 3398837, upload-time = "2025-01-07T10:04:22.778Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/8a/79dbce9006e0bd6b74516f97451a7b7c64dbbb426df15d901dd438cfeee3/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d991376f0eac70a60f0cbc95602aa708a6f7c8617f28b4945c1431d67b8e3c8", size = 3546986, upload-time = "2025-01-07T10:04:36.415Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f7/9ac239b6ee6fe0bad130325d987a93ea58c4118e50479f0786f1733b37e8/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ac4eddcd99575ed3735ed911ddf9d1697e2bd13aa3f0ad7e3904dd4863842e", size = 4071715, upload-time = "2025-01-07T10:04:53.224Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/a3/0ed697279f5eeb7a40f279bd783cf50e6d0b91f24120dcf66ef2cf8822b4/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:57fd9880da1ee0f47250f735f791fab788f0aa1ee36afc49f761349869c8b4d9", size = 3388081, upload-time = "2025-01-07T10:04:57.818Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/eb/47e477bdf1d784f31c7540db6cc8c354b777e51a186897a7abda34517f36/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:5d561f0520f493c66b016d99ceabe69c23289aa90be38dd802d2aef279f15751", size = 3658654, upload-time = "2025-01-07T10:05:03.168Z" },
+    { url = "https://files.pythonhosted.org/packages/45/07/6661e43fbee09594a8a5e9bb778107d95fe38dac4c653982afe03d32bd4d/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a5b366d34cd449fe9b20ef25941e6eef0460a2f74e7389f02e673e1f88ebd538", size = 3690551, upload-time = "2025-01-07T10:05:09.238Z" },
+    { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046, upload-time = "2025-01-07T10:04:51.003Z" },
+    { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126, upload-time = "2025-01-07T10:04:45.712Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604, upload-time = "2025-01-07T10:04:14.173Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995, upload-time = "2025-01-07T10:04:18.663Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908, upload-time = "2025-01-07T10:04:32.834Z" },
+    { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839, upload-time = "2025-01-07T10:04:26.122Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664, upload-time = "2025-01-07T10:04:40.123Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732, upload-time = "2025-01-07T10:04:55.624Z" },
+    { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096, upload-time = "2025-01-07T10:04:59.98Z" },
+    { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743, upload-time = "2025-01-07T10:05:05.416Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243, upload-time = "2025-01-07T10:05:11.411Z" },
+    { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605, upload-time = "2025-01-07T10:05:18.873Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240, upload-time = "2025-01-07T10:05:14.324Z" },
+]
+
 [[package]]
 name = "hf-xet"
 version = "1.1.8"
@@ -2321,7 +2723,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "packaging" },
     { name = "pyyaml" },
     { name = "requests" },
@@ -2441,6 +2843,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/01/72d6472f80651673716d1deda2a5bbb633e563ecf94f4479da5519d69d25/interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c", size = 23635, upload-time = "2024-01-06T23:01:20.829Z" },
 ]
 
+[[package]]
+name = "ipython"
+version = "9.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "decorator" },
+    { name = "ipython-pygments-lexers" },
+    { name = "jedi" },
+    { name = "matplotlib-inline" },
+    { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
+    { name = "prompt-toolkit" },
+    { name = "pygments" },
+    { name = "stack-data" },
+    { name = "traitlets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/46/dd/fb08d22ec0c27e73c8bc8f71810709870d51cadaf27b7ddd3f011236c100/ipython-9.9.0.tar.gz", hash = "sha256:48fbed1b2de5e2c7177eefa144aba7fcb82dac514f09b57e2ac9da34ddb54220", size = 4425043, upload-time = "2026-01-05T12:36:46.233Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/92/162cfaee4ccf370465c5af1ce36a9eacec1becb552f2033bb3584e6f640a/ipython-9.9.0-py3-none-any.whl", hash = "sha256:b457fe9165df2b84e8ec909a97abcf2ed88f565970efba16b1f7229c283d252b", size = 621431, upload-time = "2026-01-05T12:36:44.669Z" },
+]
+
+[[package]]
+name = "ipython-pygments-lexers"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" },
+]
+
 [[package]]
 name = "itsdangerous"
 version = "2.2.0"
@@ -2450,6 +2885,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" },
 ]
 
+[[package]]
+name = "jedi"
+version = "0.19.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "parso" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -2678,8 +3125,8 @@ dependencies = [
     { name = "scipy" },
     { name = "soundfile" },
     { name = "soxr" },
-    { name = "standard-aifc", marker = "python_full_version >= '3.13'" },
-    { name = "standard-sunau", marker = "python_full_version >= '3.13'" },
+    { name = "standard-aifc", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "standard-sunau", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/64/36/360b5aafa0238e29758729e9486c6ed92a6f37fa403b7875e06c115cdf4a/librosa-0.11.0.tar.gz", hash = "sha256:f5ed951ca189b375bbe2e33b2abd7e040ceeee302b9bbaeeffdfddb8d0ace908", size = 327001, upload-time = "2025-03-11T15:09:54.884Z" }
@@ -2687,14 +3134,66 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b5/ba/c63c5786dfee4c3417094c4b00966e61e4a63efecee22cb7b4c0387dda83/librosa-0.11.0-py3-none-any.whl", hash = "sha256:0b6415c4fd68bff4c29288abe67c6d80b587e0e1e2cfb0aad23e4559504a7fa1", size = 260749, upload-time = "2025-03-11T15:09:52.982Z" },
 ]
 
+[[package]]
+name = "librt"
+version = "0.7.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/29/47f29026ca17f35cf299290292d5f8331f5077364974b7675a353179afa2/librt-0.7.7.tar.gz", hash = "sha256:81d957b069fed1890953c3b9c3895c7689960f233eea9a1d9607f71ce7f00b2c", size = 145910, upload-time = "2026-01-01T23:52:22.87Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/56/72/1cd9d752070011641e8aee046c851912d5f196ecd726fffa7aed2070f3e0/librt-0.7.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a85a1fc4ed11ea0eb0a632459ce004a2d14afc085a50ae3463cd3dfe1ce43fc", size = 55687, upload-time = "2026-01-01T23:51:16.291Z" },
+    { url = "https://files.pythonhosted.org/packages/50/aa/d5a1d4221c4fe7e76ae1459d24d6037783cb83c7645164c07d7daf1576ec/librt-0.7.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c87654e29a35938baead1c4559858f346f4a2a7588574a14d784f300ffba0efd", size = 57136, upload-time = "2026-01-01T23:51:17.363Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6f/0c86b5cb5e7ef63208c8cc22534df10ecc5278efc0d47fb8815577f3ca2f/librt-0.7.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c9faaebb1c6212c20afd8043cd6ed9de0a47d77f91a6b5b48f4e46ed470703fe", size = 165320, upload-time = "2026-01-01T23:51:18.455Z" },
+    { url = "https://files.pythonhosted.org/packages/16/37/df4652690c29f645ffe405b58285a4109e9fe855c5bb56e817e3e75840b3/librt-0.7.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1908c3e5a5ef86b23391448b47759298f87f997c3bd153a770828f58c2bb4630", size = 174216, upload-time = "2026-01-01T23:51:19.599Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/d6/d3afe071910a43133ec9c0f3e4ce99ee6df0d4e44e4bddf4b9e1c6ed41cc/librt-0.7.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbc4900e95a98fc0729523be9d93a8fedebb026f32ed9ffc08acd82e3e181503", size = 189005, upload-time = "2026-01-01T23:51:21.052Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/18/74060a870fe2d9fd9f47824eba6717ce7ce03124a0d1e85498e0e7efc1b2/librt-0.7.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a7ea4e1fbd253e5c68ea0fe63d08577f9d288a73f17d82f652ebc61fa48d878d", size = 183961, upload-time = "2026-01-01T23:51:22.493Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/5e/918a86c66304af66a3c1d46d54df1b2d0b8894babc42a14fb6f25511497f/librt-0.7.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ef7699b7a5a244b1119f85c5bbc13f152cd38240cbb2baa19b769433bae98e50", size = 177610, upload-time = "2026-01-01T23:51:23.874Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/d7/b5e58dc2d570f162e99201b8c0151acf40a03a39c32ab824dd4febf12736/librt-0.7.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:955c62571de0b181d9e9e0a0303c8bc90d47670a5eff54cf71bf5da61d1899cf", size = 199272, upload-time = "2026-01-01T23:51:25.341Z" },
+    { url = "https://files.pythonhosted.org/packages/18/87/8202c9bd0968bdddc188ec3811985f47f58ed161b3749299f2c0dd0f63fb/librt-0.7.7-cp312-cp312-win32.whl", hash = "sha256:1bcd79be209313b270b0e1a51c67ae1af28adad0e0c7e84c3ad4b5cb57aaa75b", size = 43189, upload-time = "2026-01-01T23:51:26.799Z" },
+    { url = "https://files.pythonhosted.org/packages/61/8d/80244b267b585e7aa79ffdac19f66c4861effc3a24598e77909ecdd0850e/librt-0.7.7-cp312-cp312-win_amd64.whl", hash = "sha256:4353ee891a1834567e0302d4bd5e60f531912179578c36f3d0430f8c5e16b456", size = 49462, upload-time = "2026-01-01T23:51:27.813Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/1f/75db802d6a4992d95e8a889682601af9b49d5a13bbfa246d414eede1b56c/librt-0.7.7-cp312-cp312-win_arm64.whl", hash = "sha256:a76f1d679beccccdf8c1958e732a1dfcd6e749f8821ee59d7bec009ac308c029", size = 42828, upload-time = "2026-01-01T23:51:28.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/5e/d979ccb0a81407ec47c14ea68fb217ff4315521730033e1dd9faa4f3e2c1/librt-0.7.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f4a0b0a3c86ba9193a8e23bb18f100d647bf192390ae195d84dfa0a10fb6244", size = 55746, upload-time = "2026-01-01T23:51:29.828Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/2c/3b65861fb32f802c3783d6ac66fc5589564d07452a47a8cf9980d531cad3/librt-0.7.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5335890fea9f9e6c4fdf8683061b9ccdcbe47c6dc03ab8e9b68c10acf78be78d", size = 57174, upload-time = "2026-01-01T23:51:31.226Z" },
+    { url = "https://files.pythonhosted.org/packages/50/df/030b50614b29e443607220097ebaf438531ea218c7a9a3e21ea862a919cd/librt-0.7.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9b4346b1225be26def3ccc6c965751c74868f0578cbcba293c8ae9168483d811", size = 165834, upload-time = "2026-01-01T23:51:32.278Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/e1/bd8d1eacacb24be26a47f157719553bbd1b3fe812c30dddf121c0436fd0b/librt-0.7.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a10b8eebdaca6e9fdbaf88b5aefc0e324b763a5f40b1266532590d5afb268a4c", size = 174819, upload-time = "2026-01-01T23:51:33.461Z" },
+    { url = "https://files.pythonhosted.org/packages/46/7d/91d6c3372acf54a019c1ad8da4c9ecf4fc27d039708880bf95f48dbe426a/librt-0.7.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:067be973d90d9e319e6eb4ee2a9b9307f0ecd648b8a9002fa237289a4a07a9e7", size = 189607, upload-time = "2026-01-01T23:51:34.604Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/ac/44604d6d3886f791fbd1c6ae12d5a782a8f4aca927484731979f5e92c200/librt-0.7.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:23d2299ed007812cccc1ecef018db7d922733382561230de1f3954db28433977", size = 184586, upload-time = "2026-01-01T23:51:35.845Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/26/d8a6e4c17117b7f9b83301319d9a9de862ae56b133efb4bad8b3aa0808c9/librt-0.7.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6b6f8ea465524aa4c7420c7cc4ca7d46fe00981de8debc67b1cc2e9957bb5b9d", size = 178251, upload-time = "2026-01-01T23:51:37.018Z" },
+    { url = "https://files.pythonhosted.org/packages/99/ab/98d857e254376f8e2f668e807daccc1f445e4b4fc2f6f9c1cc08866b0227/librt-0.7.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8df32a99cc46eb0ee90afd9ada113ae2cafe7e8d673686cf03ec53e49635439", size = 199853, upload-time = "2026-01-01T23:51:38.195Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/55/4523210d6ae5134a5da959900be43ad8bab2e4206687b6620befddb5b5fd/librt-0.7.7-cp313-cp313-win32.whl", hash = "sha256:86f86b3b785487c7760247bcdac0b11aa8bf13245a13ed05206286135877564b", size = 43247, upload-time = "2026-01-01T23:51:39.629Z" },
+    { url = "https://files.pythonhosted.org/packages/25/40/3ec0fed5e8e9297b1cf1a3836fb589d3de55f9930e3aba988d379e8ef67c/librt-0.7.7-cp313-cp313-win_amd64.whl", hash = "sha256:4862cb2c702b1f905c0503b72d9d4daf65a7fdf5a9e84560e563471e57a56949", size = 49419, upload-time = "2026-01-01T23:51:40.674Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/7a/aab5f0fb122822e2acbc776addf8b9abfb4944a9056c00c393e46e543177/librt-0.7.7-cp313-cp313-win_arm64.whl", hash = "sha256:0996c83b1cb43c00e8c87835a284f9057bc647abd42b5871e5f941d30010c832", size = 42828, upload-time = "2026-01-01T23:51:41.731Z" },
+    { url = "https://files.pythonhosted.org/packages/69/9c/228a5c1224bd23809a635490a162e9cbdc68d99f0eeb4a696f07886b8206/librt-0.7.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:23daa1ab0512bafdd677eb1bfc9611d8ffbe2e328895671e64cb34166bc1b8c8", size = 55188, upload-time = "2026-01-01T23:51:43.14Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/c2/0e7c6067e2b32a156308205e5728f4ed6478c501947e9142f525afbc6bd2/librt-0.7.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:558a9e5a6f3cc1e20b3168fb1dc802d0d8fa40731f6e9932dcc52bbcfbd37111", size = 56895, upload-time = "2026-01-01T23:51:44.534Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/77/de50ff70c80855eb79d1d74035ef06f664dd073fb7fb9d9fb4429651b8eb/librt-0.7.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2567cb48dc03e5b246927ab35cbb343376e24501260a9b5e30b8e255dca0d1d2", size = 163724, upload-time = "2026-01-01T23:51:45.571Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/19/f8e4bf537899bdef9e0bb9f0e4b18912c2d0f858ad02091b6019864c9a6d/librt-0.7.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6066c638cdf85ff92fc6f932d2d73c93a0e03492cdfa8778e6d58c489a3d7259", size = 172470, upload-time = "2026-01-01T23:51:46.823Z" },
+    { url = "https://files.pythonhosted.org/packages/42/4c/dcc575b69d99076768e8dd6141d9aecd4234cba7f0e09217937f52edb6ed/librt-0.7.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a609849aca463074c17de9cda173c276eb8fee9e441053529e7b9e249dc8b8ee", size = 186806, upload-time = "2026-01-01T23:51:48.009Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/f8/4094a2b7816c88de81239a83ede6e87f1138477d7ee956c30f136009eb29/librt-0.7.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:add4e0a000858fe9bb39ed55f31085506a5c38363e6eb4a1e5943a10c2bfc3d1", size = 181809, upload-time = "2026-01-01T23:51:49.35Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/ac/821b7c0ab1b5a6cd9aee7ace8309c91545a2607185101827f79122219a7e/librt-0.7.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a3bfe73a32bd0bdb9a87d586b05a23c0a1729205d79df66dee65bb2e40d671ba", size = 175597, upload-time = "2026-01-01T23:51:50.636Z" },
+    { url = "https://files.pythonhosted.org/packages/71/f9/27f6bfbcc764805864c04211c6ed636fe1d58f57a7b68d1f4ae5ed74e0e0/librt-0.7.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0ecce0544d3db91a40f8b57ae26928c02130a997b540f908cefd4d279d6c5848", size = 196506, upload-time = "2026-01-01T23:51:52.535Z" },
+    { url = "https://files.pythonhosted.org/packages/46/ba/c9b9c6fc931dd7ea856c573174ccaf48714905b1a7499904db2552e3bbaf/librt-0.7.7-cp314-cp314-win32.whl", hash = "sha256:8f7a74cf3a80f0c3b0ec75b0c650b2f0a894a2cec57ef75f6f72c1e82cdac61d", size = 39747, upload-time = "2026-01-01T23:51:53.683Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/69/cd1269337c4cde3ee70176ee611ab0058aa42fc8ce5c9dce55f48facfcd8/librt-0.7.7-cp314-cp314-win_amd64.whl", hash = "sha256:3d1fe2e8df3268dd6734dba33ededae72ad5c3a859b9577bc00b715759c5aaab", size = 45971, upload-time = "2026-01-01T23:51:54.697Z" },
+    { url = "https://files.pythonhosted.org/packages/79/fd/e0844794423f5583108c5991313c15e2b400995f44f6ec6871f8aaf8243c/librt-0.7.7-cp314-cp314-win_arm64.whl", hash = "sha256:2987cf827011907d3dfd109f1be0d61e173d68b1270107bb0e89f2fca7f2ed6b", size = 39075, upload-time = "2026-01-01T23:51:55.726Z" },
+    { url = "https://files.pythonhosted.org/packages/42/02/211fd8f7c381e7b2a11d0fdfcd410f409e89967be2e705983f7c6342209a/librt-0.7.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8e92c8de62b40bfce91d5e12c6e8b15434da268979b1af1a6589463549d491e6", size = 57368, upload-time = "2026-01-01T23:51:56.706Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/b6/aca257affae73ece26041ae76032153266d110453173f67d7603058e708c/librt-0.7.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f683dcd49e2494a7535e30f779aa1ad6e3732a019d80abe1309ea91ccd3230e3", size = 59238, upload-time = "2026-01-01T23:51:58.066Z" },
+    { url = "https://files.pythonhosted.org/packages/96/47/7383a507d8e0c11c78ca34c9d36eab9000db5989d446a2f05dc40e76c64f/librt-0.7.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9b15e5d17812d4d629ff576699954f74e2cc24a02a4fc401882dd94f81daba45", size = 183870, upload-time = "2026-01-01T23:51:59.204Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/b8/50f3d8eec8efdaf79443963624175c92cec0ba84827a66b7fcfa78598e51/librt-0.7.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c084841b879c4d9b9fa34e5d5263994f21aea7fd9c6add29194dbb41a6210536", size = 194608, upload-time = "2026-01-01T23:52:00.419Z" },
+    { url = "https://files.pythonhosted.org/packages/23/d9/1b6520793aadb59d891e3b98ee057a75de7f737e4a8b4b37fdbecb10d60f/librt-0.7.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c8fb9966f84737115513fecbaf257f9553d067a7dd45a69c2c7e5339e6a8dc", size = 206776, upload-time = "2026-01-01T23:52:01.705Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/db/331edc3bba929d2756fa335bfcf736f36eff4efcb4f2600b545a35c2ae58/librt-0.7.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9b5fb1ecb2c35362eab2dbd354fd1efa5a8440d3e73a68be11921042a0edc0ff", size = 203206, upload-time = "2026-01-01T23:52:03.315Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/e1/6af79ec77204e85f6f2294fc171a30a91bb0e35d78493532ed680f5d98be/librt-0.7.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:d1454899909d63cc9199a89fcc4f81bdd9004aef577d4ffc022e600c412d57f3", size = 196697, upload-time = "2026-01-01T23:52:04.857Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/46/de55ecce4b2796d6d243295c221082ca3a944dc2fb3a52dcc8660ce7727d/librt-0.7.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7ef28f2e7a016b29792fe0a2dd04dec75725b32a1264e390c366103f834a9c3a", size = 217193, upload-time = "2026-01-01T23:52:06.159Z" },
+    { url = "https://files.pythonhosted.org/packages/41/61/33063e271949787a2f8dd33c5260357e3d512a114fc82ca7890b65a76e2d/librt-0.7.7-cp314-cp314t-win32.whl", hash = "sha256:5e419e0db70991b6ba037b70c1d5bbe92b20ddf82f31ad01d77a347ed9781398", size = 40277, upload-time = "2026-01-01T23:52:07.625Z" },
+    { url = "https://files.pythonhosted.org/packages/06/21/1abd972349f83a696ea73159ac964e63e2d14086fdd9bc7ca878c25fced4/librt-0.7.7-cp314-cp314t-win_amd64.whl", hash = "sha256:d6b7d93657332c817b8d674ef6bf1ab7796b4f7ce05e420fd45bd258a72ac804", size = 46765, upload-time = "2026-01-01T23:52:08.647Z" },
+    { url = "https://files.pythonhosted.org/packages/51/0e/b756c7708143a63fca65a51ca07990fa647db2cc8fcd65177b9e96680255/librt-0.7.7-cp314-cp314t-win_arm64.whl", hash = "sha256:142c2cd91794b79fd0ce113bd658993b7ede0fe93057668c2f98a45ca00b7e91", size = 39724, upload-time = "2026-01-01T23:52:09.745Z" },
+]
+
 [[package]]
 name = "liger-kernel"
 version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
-    { name = "triton", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux') or sys_platform == 'win32'" },
-    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/31/23/be0b4dcac42d77f99406c906567cde22a7a3d71b3f3ffdfda2ac6153ec36/liger_kernel-0.6.2.tar.gz", hash = "sha256:5c5bcffffa769bc26ae838f5a4954170dd5cacde036abb1b383039f39fa5fd69", size = 3679495, upload-time = "2025-08-22T00:15:28.456Z" }
 wheels = [
@@ -2710,7 +3209,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3b/33/be5acb85cd8cdc4afde33d9c234eece9f318e087920255af3c05864cd3e7/llguidance-1.3.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f7685222660a762e481ac633d49cc559c64980fe2ee59c8f932a5bb5cbc0c2c2", size = 3220647, upload-time = "2025-10-20T19:58:42.542Z" },
     { url = "https://files.pythonhosted.org/packages/82/e6/b48bda5b15efeaeb62bd0dba8fc6a01d4ae5457a85dbb5d18632385fe15c/llguidance-1.3.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:098030ff0687261a3f1bd54cf21fe951fc861d56d37a0671250dd36677eaf224", size = 3099830, upload-time = "2025-10-20T19:58:40.826Z" },
     { url = "https://files.pythonhosted.org/packages/aa/11/44389d3d1526d7a5c38ffd587a5ebc61d7bee443ac1dea95f2089ad58f5f/llguidance-1.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f6caca5d78db7f76e1fbb0fff8607b861c32d47fa3d5dee2fc49de27ee269df", size = 2835242, upload-time = "2025-10-20T19:58:34.518Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ca/53ea256396405e4dee70d5a4a35e18543408e18bb16b251d6ca6b5d80310/llguidance-1.3.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0612bb3f034d2487b6e8f9561f02a94a6039d88273bf0c5c539a3bd3895e47d2", size = 3297480, upload-time = "2025-10-20T19:58:37.033Z" },
     { url = "https://files.pythonhosted.org/packages/83/a8/1ff2bedb8f9acb46a2d2d603415d272bb622c142ea86f5b95445cc6e366c/llguidance-1.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc17e9dd602c3879bf91664a64bf72f54c74dbfbeb24ccfab6a5fe435b12f7aa", size = 3033133, upload-time = "2025-10-20T19:58:38.721Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/a7/9b8086c0cfdddf3f6d47b173a404fa7ac46272f7affbee082c36740f4f1c/llguidance-1.3.0-cp39-abi3-win32.whl", hash = "sha256:2f6f558485a43e273fc5c6c974a9a3ace5d5e170076db9b40e0560e41c3ff18f", size = 2598109, upload-time = "2025-10-20T19:58:47.656Z" },
     { url = "https://files.pythonhosted.org/packages/5a/7e/809349638231f469b9056c0e1bfd924d5ef5558b3b3ec72d093b6fad33b1/llguidance-1.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:1d1cd1c8618d1a13605d3e057c978651e551c8c469b481ee4041f1d6c436002d", size = 2789946, upload-time = "2025-10-20T19:58:45.958Z" },
 ]
 
@@ -2820,8 +3321,8 @@ dependencies = [
     { name = "causal-conv1d" },
     { name = "ninja" },
     { name = "packaging" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [[package]]
@@ -2949,6 +3450,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/60/3601f8ce6d76a7c81c7f25a0e15fde0d6b66226dd187aa6d2838e6374161/matplotlib-3.10.5-cp314-cp314t-win_arm64.whl", hash = "sha256:2efaf97d72629e74252e0b5e3c46813e9eeaa94e011ecf8084a971a31a97f40b", size = 8153849, upload-time = "2025-07-31T18:09:19.673Z" },
 ]
 
+[[package]]
+name = "matplotlib-inline"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "traitlets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c7/74/97e72a36efd4ae2bccb3463284300f8953f199b5ffbc04cbbb0ec78f74b1/matplotlib_inline-0.2.1.tar.gz", hash = "sha256:e1ee949c340d771fc39e241ea75683deb94762c8fa5f2927ec57c83c4dffa9fe", size = 8110, upload-time = "2025-10-23T09:00:22.126Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
+]
+
 [[package]]
 name = "mdit-py-plugins"
 version = "0.5.0"
@@ -2974,6 +3487,7 @@ wheels = [
 name = "megatron-bridge"
 source = { editable = "3rdparty/Megatron-Bridge-workspace" }
 dependencies = [
+    { name = "accelerate" },
     { name = "causal-conv1d" },
     { name = "datasets" },
     { name = "flash-linear-attention" },
@@ -2999,6 +3513,7 @@ dependencies = [
 
 [package.metadata]
 requires-dist = [
+    { name = "accelerate" },
     { name = "causal-conv1d", git = "https://github.com/Dao-AILab/causal-conv1d?rev=67e0a9dfe1518fc0036444e9ab5fe06ab78299e0" },
     { name = "datasets" },
     { name = "flash-linear-attention" },
@@ -3016,8 +3531,8 @@ requires-dist = [
     { name = "tensorboard", specifier = ">=2.19.0" },
     { name = "timm" },
     { name = "tqdm", specifier = ">=4.67.1" },
-    { name = "transformer-engine", extras = ["pytorch"], specifier = ">=2.9.0a0,<2.10.0" },
-    { name = "transformers", specifier = ">=4.57.1" },
+    { name = "transformer-engine", extras = ["pytorch"], specifier = ">=2.10.0a0,<2.12.0" },
+    { name = "transformers", specifier = "<5.0.0" },
     { name = "typing-extensions" },
     { name = "wandb", specifier = ">=0.19.10" },
 ]
@@ -3028,10 +3543,13 @@ source = { editable = "3rdparty/Megatron-LM-workspace" }
 dependencies = [
     { name = "av" },
     { name = "causal-conv1d" },
+    { name = "datasets" },
     { name = "einops" },
     { name = "emerging-optimizers" },
+    { name = "fastapi" },
     { name = "flash-linear-attention" },
-    { name = "flashinfer-python" },
+    { name = "flashinfer-python", version = "0.5.2", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
+    { name = "flashinfer-python", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-sglang' or (extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm')" },
     { name = "mamba-ssm" },
     { name = "megatron-energon", extra = ["av-decode"] },
     { name = "multi-storage-client" },
@@ -3043,11 +3561,10 @@ dependencies = [
     { name = "onnxscript" },
     { name = "opentelemetry-api" },
     { name = "packaging" },
-    { name = "setuptools" },
-    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "tensorstore", version = "0.1.76", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "tensorstore", version = "0.1.76", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "tqdm" },
     { name = "transformer-engine", extra = ["pytorch"] },
     { name = "wget" },
@@ -3055,29 +3572,30 @@ dependencies = [
 
 [package.metadata]
 requires-dist = [
-    { name = "av", specifier = "<16.0.0" },
+    { name = "av" },
     { name = "causal-conv1d", git = "https://github.com/Dao-AILab/causal-conv1d?rev=67e0a9dfe1518fc0036444e9ab5fe06ab78299e0" },
+    { name = "datasets" },
     { name = "einops", specifier = "~=0.8" },
     { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" },
+    { name = "fastapi", specifier = "~=0.50" },
     { name = "flash-linear-attention", specifier = "~=0.3.2" },
-    { name = "flashinfer-python" },
+    { name = "flashinfer-python", specifier = "~=0.5.0" },
     { name = "mamba-ssm", git = "https://github.com/state-spaces/mamba.git?rev=d68d16ed7d5d5164eb5a57c0285f3b7eb8394ec1" },
     { name = "megatron-energon", extras = ["av-decode"], specifier = "~=6.0" },
     { name = "multi-storage-client", specifier = "~=0.27" },
-    { name = "numpy", specifier = "<2.0.0" },
+    { name = "numpy" },
     { name = "nv-grouped-gemm", git = "https://github.com/fanshiqing/grouped_gemm?tag=v1.1.4.post7" },
-    { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin'", specifier = ">=0.33.0a0,<0.34.0" },
-    { name = "nvidia-resiliency-ext", specifier = ">=0.4.0a0,<0.5.0" },
+    { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-resiliency-ext" },
     { name = "nvtx", specifier = "~=0.2" },
     { name = "onnxscript" },
     { name = "opentelemetry-api", specifier = "~=1.33.1" },
     { name = "packaging", specifier = ">=24.2" },
-    { name = "setuptools", specifier = "<80.0.0" },
     { name = "tensorstore", specifier = "~=0.1,!=0.1.46,!=0.1.72" },
     { name = "torch", marker = "sys_platform != 'darwin'", index = "https://download.pytorch.org/whl/cu129" },
     { name = "torch", marker = "sys_platform == 'darwin'", index = "https://pypi.org/simple" },
     { name = "tqdm" },
-    { name = "transformer-engine", extras = ["pytorch"], specifier = ">=2.9.0a0,<2.10.0" },
+    { name = "transformer-engine", extras = ["core-cu13", "pytorch"], specifier = ">=2.9.0a0,<2.12.0" },
     { name = "wget" },
 ]
 
@@ -3093,8 +3611,8 @@ dependencies = [
     { name = "pillow" },
     { name = "pyyaml" },
     { name = "s3fs" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "tqdm" },
     { name = "webdataset" },
 ]
@@ -3120,8 +3638,8 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
     { name = "packaging" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a0/be/06ada3d765ebca304e2d87873d6cf00807b43155ed57058abcd813d13a5d/megatron_fsdp-0.1.0rc1.tar.gz", hash = "sha256:4852a1c62bb95b5fc9567165ee7119f2e68bc75d6103af06bd1e6d392a50021f", size = 71600, upload-time = "2025-09-02T21:29:10.757Z" }
 wheels = [
@@ -3160,15 +3678,92 @@ name = "ml-dtypes"
 version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fd/15/76f86faa0902836cc133939732f7611ace68cf54148487a99c539c272dc8/ml_dtypes-0.4.1.tar.gz", hash = "sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a", size = 692594, upload-time = "2024-09-13T19:07:11.624Z" }
 wheels = [
@@ -3183,15 +3778,92 @@ name = "ml-dtypes"
 version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'darwin'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "numpy", marker = "python_full_version < '3.13'" },
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/78/a7/aad060393123cfb383956dca68402aff3db1e1caffd5764887ed5153f41b/ml_dtypes-0.5.3.tar.gz", hash = "sha256:95ce33057ba4d05df50b1f3cfefab22e351868a843b3b15a46c65836283670c9", size = 692316, upload-time = "2025-07-29T18:39:19.454Z" }
 wheels = [
@@ -3229,7 +3901,7 @@ dependencies = [
     { name = "flask" },
     { name = "flask-cors" },
     { name = "graphene" },
-    { name = "gunicorn", marker = "sys_platform != 'win32'" },
+    { name = "gunicorn", marker = "sys_platform != 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "matplotlib" },
     { name = "mlflow-skinny" },
     { name = "mlflow-tracing" },
@@ -3239,7 +3911,7 @@ dependencies = [
     { name = "scikit-learn" },
     { name = "scipy" },
     { name = "sqlalchemy" },
-    { name = "waitress", marker = "sys_platform == 'win32'" },
+    { name = "waitress", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/64/7e/516ba65bfa6f5857904ce18bcb738234004663dae1197cee082d48f1ad29/mlflow-3.5.1.tar.gz", hash = "sha256:32630f2aaadeb6dc6ccbde56247a1500518b38d0a7cc12f714be1703b6ee3ea1", size = 8300179, upload-time = "2025-10-22T18:11:47.263Z" }
 wheels = [
@@ -3306,9 +3978,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f2/c9/d12ed6a8393450e28eb1f552b50200f83f138b1268b5f4e8074a76d745a2/mlx-0.28.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:97866d5e454e8f2d7bc42aadcbfd7565d40f4755564785e4fb964812fbad604b", size = 564160, upload-time = "2025-08-07T07:50:34.652Z" },
     { url = "https://files.pythonhosted.org/packages/71/4f/3951766a5edb75c0d2d860381f592d271b4c3b7241e730e78dd63926f5b4/mlx-0.28.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5204ebf399439e5da374295f6c1b6961355824604eed7026c18edfe4c83e9243", size = 540098, upload-time = "2025-08-07T07:50:52.67Z" },
     { url = "https://files.pythonhosted.org/packages/f7/52/cb8eb03544eace055a500bd4a3b776a3ce48198d7b7b398e21a5a3256e89/mlx-0.28.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:34776bd3fe97bca7c6c76d77f6104e0d6b05b3626bb3cf9ed48d3a9bbd46c180", size = 540100, upload-time = "2025-08-07T07:50:49.095Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/23/cb2703724f011d9aabc5a5a8ddf20481fdbdf251227468de89885b71f0ed/mlx-0.28.0-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:ead9a6c13b704239b5ca50d68f5effc505c8f15a6017f35d8b4d0e25832e29dd", size = 632943, upload-time = "2025-08-07T07:52:35.695Z" },
     { url = "https://files.pythonhosted.org/packages/cd/fb/795f3540057642bcf3a95fe7d17c14ffaca2102511328eee6cd92d49223e/mlx-0.28.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:78c88e5cc4188f538935b23803e10eaf084caa8bfeaa2a6de983038ecee3fd78", size = 564139, upload-time = "2025-08-07T07:50:31.487Z" },
     { url = "https://files.pythonhosted.org/packages/7e/4a/39609e5e3fea14c429e8a61f9754e61e4ed5289422223ad213df9116fd55/mlx-0.28.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0b7a57a584ea5e807ec0a17c4eb179a71e01eeff9f25dff6950abad1e30443c2", size = 540205, upload-time = "2025-08-07T07:50:47.284Z" },
     { url = "https://files.pythonhosted.org/packages/43/af/738ea855df6742a4ac4ee1c72f298ff6cf50f0af7e553e89a1a41060c12c/mlx-0.28.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:a7cdcbd3faff45c18e9f51f95e9aa9410c71bbb4d5d86878a97eb996a0467505", size = 540201, upload-time = "2025-08-07T07:50:45.122Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/23/3cb550734765e136147ac8e05851cc9b64c2397518148367ed1db346d09c/mlx-0.28.0-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:91f76bea8a192b423fa2b458a4c293c6b36e2b4b7f13a15eea94f0dc6ae03f13", size = 633209, upload-time = "2025-08-07T07:53:32.143Z" },
 ]
 
 [[package]]
@@ -3353,6 +4027,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9b/fc/d6034069e52003ed86f72e436b65f16084fa4d08c6b8220bc0fc85e33eab/model_hosting_container_standards-0.1.4-py3-none-any.whl", hash = "sha256:ede565ba750e812eef028804c84b8244a96fb733fcaec9a1e552568df809d841", size = 86597, upload-time = "2025-11-10T17:58:35.843Z" },
 ]
 
+[[package]]
+name = "modelscope"
+version = "1.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "requests" },
+    { name = "setuptools" },
+    { name = "tqdm" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/02/db35ce865e66fd212fcf0cb5b43db3a3474cf82fae8d835b56ce7dba9247/modelscope-1.33.0.tar.gz", hash = "sha256:5d9ca8eb934cabea236104ed774b3ddf352f96c705272876108aaa25a3bb0b38", size = 4558673, upload-time = "2025-12-10T03:50:01.05Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/05/63f01821681b2be5d1739b4aad7b186c28d4ead2c5c99a9fc4aa53c13c19/modelscope-1.33.0-py3-none-any.whl", hash = "sha256:d9bdd566303f813d762e133410007eaf1b78f065c871228ab38640919b707489", size = 6050040, upload-time = "2025-12-10T03:49:58.428Z" },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -3521,6 +4211,48 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
 ]
 
+[[package]]
+name = "mypy"
+version = "1.19.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "librt", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" },
+    { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" },
+    { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" },
+    { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" },
+    { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" },
+    { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" },
+    { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" },
+    { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
 [[package]]
 name = "myst-parser"
 version = "4.0.1"
@@ -3555,15 +4287,16 @@ dependencies = [
     { name = "diffusers" },
     { name = "ftfy" },
     { name = "imageio-ffmpeg" },
-    { name = "liger-kernel", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "liger-kernel", marker = "(platform_machine == 'x86_64' and sys_platform != 'darwin') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "megatron-fsdp" },
     { name = "mlflow" },
     { name = "opencv-python-headless" },
     { name = "pybind11" },
     { name = "pyyaml" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchao" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchao", version = "0.9.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-sglang'" },
+    { name = "torchao", version = "0.14.1", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra != 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "torchdata" },
     { name = "transformers" },
     { name = "wandb" },
@@ -3579,10 +4312,10 @@ all = [
     { name = "perceptron" },
     { name = "pillow" },
     { name = "qwen-omni-utils" },
-    { name = "qwen-vl-utils", extra = ["decord"], marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "qwen-vl-utils", extra = ["decord"], marker = "(platform_machine == 'x86_64' and sys_platform != 'darwin') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "sentencepiece" },
     { name = "timm" },
-    { name = "torchcodec", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "torchcodec", marker = "(platform_machine == 'x86_64' and sys_platform != 'darwin') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 cuda = [
     { name = "flash-attn" },
@@ -3600,16 +4333,16 @@ vlm = [
     { name = "numpy" },
     { name = "pillow" },
     { name = "qwen-omni-utils" },
-    { name = "qwen-vl-utils", extra = ["decord"], marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "qwen-vl-utils", extra = ["decord"], marker = "(platform_machine == 'x86_64' and sys_platform != 'darwin') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "timm" },
-    { name = "torchcodec", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "torchcodec", marker = "(platform_machine == 'x86_64' and sys_platform != 'darwin') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [package.dev-dependencies]
 build = [
     { name = "setuptools" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 dev = [
     { name = "cut-cross-entropy" },
@@ -3698,7 +4431,7 @@ test = [
 
 [[package]]
 name = "nemo-gym"
-source = { editable = "3rdparty/Gym-workspace" }
+source = { editable = "3rdparty/Gym-workspace/Gym" }
 dependencies = [
     { name = "aiohttp" },
     { name = "datasets" },
@@ -3709,38 +4442,89 @@ dependencies = [
     { name = "mlflow" },
     { name = "omegaconf" },
     { name = "openai" },
+    { name = "orjson" },
     { name = "psutil" },
     { name = "pydantic" },
     { name = "pydantic-core" },
     { name = "ray", extra = ["default"] },
-    { name = "tdigest" },
     { name = "tqdm" },
     { name = "uvicorn" },
     { name = "uvloop" },
     { name = "yappi" },
 ]
 
+[package.optional-dependencies]
+dev = [
+    { name = "coverage" },
+    { name = "mypy" },
+    { name = "pre-commit" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
+    { name = "pytest-xdist" },
+    { name = "requests-mock" },
+    { name = "ruff" },
+]
+
+[package.dev-dependencies]
+docs = [
+    { name = "myst-parser" },
+    { name = "nvidia-sphinx-theme" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-autodoc2" },
+    { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinx-reredirects" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "swagger-plugin-for-sphinx" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "aiohttp" },
+    { name = "coverage", extras = ["toml"], marker = "extra == 'dev'" },
     { name = "datasets" },
     { name = "devtools" },
     { name = "fastapi" },
     { name = "gradio" },
     { name = "hydra-core" },
     { name = "mlflow" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
     { name = "omegaconf" },
     { name = "openai", specifier = "<=2.6.1" },
+    { name = "orjson" },
+    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.6.0" },
     { name = "psutil" },
     { name = "pydantic" },
     { name = "pydantic-core" },
+    { name = "pytest", marker = "extra == 'dev'" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'" },
+    { name = "pytest-cov", marker = "extra == 'dev'" },
+    { name = "pytest-xdist", marker = "extra == 'dev'" },
     { name = "ray", extras = ["default"] },
-    { name = "tdigest", specifier = ">=0.5.2.2" },
+    { name = "requests-mock", marker = "extra == 'dev'" },
+    { name = "ruff", marker = "extra == 'dev'" },
     { name = "tqdm" },
     { name = "uvicorn" },
     { name = "uvloop" },
     { name = "yappi" },
 ]
+provides-extras = ["dev"]
+
+[package.metadata.requires-dev]
+docs = [
+    { name = "myst-parser", specifier = ">=4.0.1" },
+    { name = "nvidia-sphinx-theme", specifier = ">=0.0.8" },
+    { name = "sphinx", specifier = ">=8.2.3" },
+    { name = "sphinx-autobuild", specifier = ">=2025.8.25" },
+    { name = "sphinx-autodoc2", specifier = ">=0.5.0" },
+    { name = "sphinx-copybutton", specifier = ">=0.5.2" },
+    { name = "sphinx-design", specifier = ">=0.6.1" },
+    { name = "sphinx-reredirects", specifier = ">=0.1.6" },
+    { name = "sphinxcontrib-mermaid", specifier = ">=1.0.0" },
+    { name = "swagger-plugin-for-sphinx", specifier = ">=6.0.0" },
+]
 
 [[package]]
 name = "nemo-rl"
@@ -3759,7 +4543,7 @@ dependencies = [
     { name = "num2words" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
-    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "nvtx" },
     { name = "omegaconf" },
     { name = "pillow" },
@@ -3773,14 +4557,14 @@ dependencies = [
     { name = "sympy" },
     { name = "tensorboard" },
     { name = "tiktoken" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "torchdata" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "transformers" },
-    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "wandb" },
 ]
 
@@ -3792,7 +4576,7 @@ automodel = [
     { name = "mamba-ssm" },
     { name = "nemo-automodel" },
     { name = "nv-grouped-gemm" },
-    { name = "transformer-engine", extra = ["pytorch"] },
+    { name = "transformer-engine", extra = ["pytorch"], marker = "extra == 'extra-7-nemo-rl-automodel' or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "vllm" },
 ]
 fsdp = [
@@ -3802,15 +4586,36 @@ fsdp = [
     { name = "vllm" },
 ]
 mcore = [
+    { name = "deep-ep" },
     { name = "flash-attn" },
     { name = "megatron-bridge" },
     { name = "megatron-core" },
-    { name = "transformer-engine", extra = ["pytorch"] },
+    { name = "transformer-engine", extra = ["pytorch"], marker = "extra == 'extra-7-nemo-rl-mcore' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "vllm" },
 ]
 nemo-gym = [
     { name = "nemo-gym" },
 ]
+sglang = [
+    { name = "compressed-tensors" },
+    { name = "einops" },
+    { name = "interegular" },
+    { name = "msgspec" },
+    { name = "openai" },
+    { name = "openai-harmony" },
+    { name = "orjson" },
+    { name = "partial-json-parser" },
+    { name = "pybase64" },
+    { name = "python-multipart" },
+    { name = "requests" },
+    { name = "sentencepiece" },
+    { name = "sgl-kernel" },
+    { name = "sglang" },
+    { name = "torch-memory-saver" },
+    { name = "torchao", version = "0.9.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "uvloop" },
+    { name = "xgrammar", version = "0.1.27", source = { registry = "https://pypi.org/simple" } },
+]
 vllm = [
     { name = "cuda-python" },
     { name = "deep-ep" },
@@ -3827,8 +4632,8 @@ build = [
     { name = "psutil" },
     { name = "pybind11" },
     { name = "setuptools" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 dev = [
     { name = "pre-commit" },
@@ -3865,16 +4670,20 @@ requires-dist = [
     { name = "causal-conv1d", marker = "extra == 'automodel'", git = "https://github.com/Dao-AILab/causal-conv1d?rev=67e0a9dfe1518fc0036444e9ab5fe06ab78299e0" },
     { name = "causal-conv1d", marker = "extra == 'fsdp'", git = "https://github.com/Dao-AILab/causal-conv1d?rev=67e0a9dfe1518fc0036444e9ab5fe06ab78299e0" },
     { name = "colored", specifier = "==2.2.3" },
+    { name = "compressed-tensors", marker = "extra == 'sglang'" },
     { name = "cuda-python", marker = "extra == 'vllm'" },
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "debugpy" },
     { name = "deep-ep", marker = "extra == 'automodel'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=bfded34800dfec415b71503f8205181de90b2480" },
+    { name = "deep-ep", marker = "extra == 'mcore'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=bfded34800dfec415b71503f8205181de90b2480" },
     { name = "deep-ep", marker = "extra == 'vllm'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=bfded34800dfec415b71503f8205181de90b2480" },
     { name = "deep-gemm", marker = "extra == 'vllm'", git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c" },
+    { name = "einops", marker = "extra == 'sglang'" },
     { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.8.1" },
     { name = "flash-attn", marker = "extra == 'fsdp'", specifier = "==2.8.1" },
     { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.8.1" },
     { name = "hydra-core" },
+    { name = "interegular", marker = "extra == 'sglang'" },
     { name = "mamba-ssm", marker = "extra == 'automodel'", git = "https://github.com/state-spaces/mamba.git?rev=d68d16ed7d5d5164eb5a57c0285f3b7eb8394ec1" },
     { name = "mamba-ssm", marker = "extra == 'fsdp'", git = "https://github.com/state-spaces/mamba.git?rev=d68d16ed7d5d5164eb5a57c0285f3b7eb8394ec1" },
     { name = "math-verify" },
@@ -3882,8 +4691,9 @@ requires-dist = [
     { name = "megatron-bridge", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-Bridge-workspace" },
     { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" },
     { name = "mlflow", specifier = ">=3.5.0,<3.6.0" },
+    { name = "msgspec", marker = "extra == 'sglang'" },
     { name = "nemo-automodel", marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" },
-    { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace" },
+    { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace/Gym" },
     { name = "ninja" },
     { name = "num2words", specifier = ">=0.5.14" },
     { name = "num2words", marker = "extra == 'vllm'", specifier = ">=0.5.14" },
@@ -3893,19 +4703,31 @@ requires-dist = [
     { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "nvtx" },
     { name = "omegaconf" },
+    { name = "openai", marker = "extra == 'sglang'" },
+    { name = "openai-harmony", marker = "extra == 'sglang'" },
+    { name = "orjson", marker = "extra == 'sglang'" },
+    { name = "partial-json-parser", marker = "extra == 'sglang'" },
     { name = "pillow", specifier = ">=11.3.0" },
     { name = "pip" },
     { name = "plotly" },
+    { name = "pybase64", marker = "extra == 'sglang'" },
+    { name = "python-multipart", marker = "extra == 'sglang'" },
     { name = "pyzmq" },
     { name = "ray", extras = ["default"], specifier = "==2.49.2" },
+    { name = "requests", marker = "extra == 'sglang'" },
     { name = "rich" },
+    { name = "sentencepiece", marker = "extra == 'sglang'" },
     { name = "setuptools" },
+    { name = "sgl-kernel", marker = "extra == 'sglang'", git = "https://github.com/sgl-project/sglang?subdirectory=sgl-kernel&tag=v0.5.7" },
+    { name = "sglang", marker = "extra == 'sglang'", specifier = "==0.5.7" },
     { name = "swanlab" },
     { name = "sympy", specifier = ">=1.14.0" },
     { name = "tensorboard" },
     { name = "tiktoken" },
     { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.9.0", index = "https://download.pytorch.org/whl/cu129" },
     { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.9.0", index = "https://pypi.org/simple" },
+    { name = "torch-memory-saver", marker = "extra == 'sglang'" },
+    { name = "torchao", marker = "extra == 'sglang'" },
     { name = "torchdata" },
     { name = "torchvision", marker = "sys_platform != 'darwin'", specifier = ">=0.22.0", index = "https://download.pytorch.org/whl/cu129" },
     { name = "torchvision", marker = "sys_platform == 'darwin'", specifier = ">=0.22.0", index = "https://pypi.org/simple" },
@@ -3913,13 +4735,15 @@ requires-dist = [
     { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'mcore'", specifier = "==2.8.0" },
     { name = "transformers", specifier = "==4.57.1" },
     { name = "triton", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "uvloop", marker = "extra == 'sglang'" },
     { name = "vllm", marker = "extra == 'automodel'", specifier = "==0.11.2" },
     { name = "vllm", marker = "extra == 'fsdp'", specifier = "==0.11.2" },
     { name = "vllm", marker = "extra == 'mcore'", specifier = "==0.11.2" },
     { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.11.2" },
     { name = "wandb" },
+    { name = "xgrammar", marker = "extra == 'sglang'" },
 ]
-provides-extras = ["fsdp", "automodel", "vllm", "mcore", "nemo-gym"]
+provides-extras = ["fsdp", "automodel", "vllm", "sglang", "mcore", "nemo-gym"]
 
 [package.metadata.requires-dev]
 build = [
@@ -3960,6 +4784,15 @@ test = [
     { name = "pytest-timeout" },
 ]
 
+[[package]]
+name = "nest-asyncio"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
+]
+
 [[package]]
 name = "networkx"
 version = "3.5"
@@ -4061,8 +4894,8 @@ source = { git = "https://github.com/fanshiqing/grouped_gemm?tag=v1.1.4.post7#6d
 dependencies = [
     { name = "numpy" },
     { name = "setuptools" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "wheel" },
 ]
 
@@ -4073,6 +4906,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/82/6c/90d3f532f608a03a13c1d6c16c266ffa3828e8011b1549d3b61db2ad59f5/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:7a950dae01add3b415a5a5cdc4ec818fb5858263e9cca59004bb99fdbbd3a5d6", size = 575006342, upload-time = "2025-06-05T20:04:16.902Z" },
     { url = "https://files.pythonhosted.org/packages/77/3c/aa88abe01f3be3d1f8f787d1d33dc83e76fec05945f9a28fbb41cfb99cd5/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:453611eb21a7c1f2c2156ed9f3a45b691deda0440ec550860290dc901af5b4c2", size = 581242350, upload-time = "2025-06-05T20:04:51.979Z" },
+    { url = "https://files.pythonhosted.org/packages/45/a1/a17fade6567c57452cfc8f967a40d1035bb9301db52f27808167fbb2be2f/nvidia_cublas_cu12-12.9.1.4-py3-none-win_amd64.whl", hash = "sha256:1e5fee10662e6e52bd71dec533fbbd4971bb70a5f24f3bc3793e5c2e9dc640bf", size = 553153899, upload-time = "2025-06-05T20:13:35.556Z" },
 ]
 
 [[package]]
@@ -4082,6 +4916,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b4/78/351b5c8cdbd9a6b4fb0d6ee73fb176dcdc1b6b6ad47c2ffff5ae8ca4a1f7/nvidia_cuda_cupti_cu12-12.9.79-py3-none-manylinux_2_25_aarch64.whl", hash = "sha256:791853b030602c6a11d08b5578edfb957cadea06e9d3b26adbf8d036135a4afe", size = 10077166, upload-time = "2025-06-05T20:01:01.385Z" },
     { url = "https://files.pythonhosted.org/packages/c1/2e/b84e32197e33f39907b455b83395a017e697c07a449a2b15fd07fc1c9981/nvidia_cuda_cupti_cu12-12.9.79-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:096bcf334f13e1984ba36685ad4c1d6347db214de03dbb6eebb237b41d9d934f", size = 10814997, upload-time = "2025-06-05T20:01:10.168Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/b4/298983ab1a83de500f77d0add86d16d63b19d1a82c59f8eaf04f90445703/nvidia_cuda_cupti_cu12-12.9.79-py3-none-win_amd64.whl", hash = "sha256:1848a9380067560d5bee10ed240eecc22991713e672c0515f9c3d9396adf93c8", size = 7730496, upload-time = "2025-06-05T20:11:26.444Z" },
 ]
 
 [[package]]
@@ -4091,6 +4926,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b8/85/e4af82cc9202023862090bfca4ea827d533329e925c758f0cde964cb54b7/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:210cf05005a447e29214e9ce50851e83fc5f4358df8b453155d5e1918094dcb4", size = 89568129, upload-time = "2025-06-05T20:02:41.973Z" },
     { url = "https://files.pythonhosted.org/packages/64/eb/c2295044b8f3b3b08860e2f6a912b702fc92568a167259df5dddb78f325e/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:096d4de6bda726415dfaf3198d4f5c522b8e70139c97feef5cd2ca6d4cd9cead", size = 44528905, upload-time = "2025-06-05T20:02:29.754Z" },
+    { url = "https://files.pythonhosted.org/packages/52/de/823919be3b9d0ccbf1f784035423c5f18f4267fb0123558d58b813c6ec86/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-win_amd64.whl", hash = "sha256:72972ebdcf504d69462d3bcd67e7b81edd25d0fb85a2c46d3ea3517666636349", size = 76408187, upload-time = "2025-06-05T20:12:27.819Z" },
 ]
 
 [[package]]
@@ -4100,6 +4936,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/bc/e0/0279bd94539fda525e0c8538db29b72a5a8495b0c12173113471d28bce78/nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83469a846206f2a733db0c42e223589ab62fd2fabac4432d2f8802de4bded0a4", size = 3515012, upload-time = "2025-06-05T20:00:35.519Z" },
     { url = "https://files.pythonhosted.org/packages/bc/46/a92db19b8309581092a3add7e6fceb4c301a3fd233969856a8cbf042cd3c/nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25bba2dfb01d48a9b59ca474a1ac43c6ebf7011f1b0b8cc44f54eb6ac48a96c3", size = 3493179, upload-time = "2025-06-05T20:00:53.735Z" },
+    { url = "https://files.pythonhosted.org/packages/59/df/e7c3a360be4f7b93cee39271b792669baeb3846c58a4df6dfcf187a7ffab/nvidia_cuda_runtime_cu12-12.9.79-py3-none-win_amd64.whl", hash = "sha256:8e018af8fa02363876860388bd10ccb89eb9ab8fb0aa749aaf58430a9f7c4891", size = 3591604, upload-time = "2025-06-05T20:11:17.036Z" },
 ]
 
 [[package]]
@@ -4107,11 +4944,12 @@ name = "nvidia-cudnn-cu12"
 version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
     { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
 ]
 
 [[package]]
@@ -4132,11 +4970,12 @@ name = "nvidia-cufft-cu12"
 version = "11.4.1.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/9b/2b/76445b0af890da61b501fde30650a1a4bd910607261b209cccb5235d3daa/nvidia_cufft_cu12-11.4.1.4-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1a28c9b12260a1aa7a8fd12f5ebd82d027963d635ba82ff39a1acfa7c4c0fbcf", size = 200822453, upload-time = "2025-06-05T20:05:27.889Z" },
     { url = "https://files.pythonhosted.org/packages/95/f4/61e6996dd20481ee834f57a8e9dca28b1869366a135e0d42e2aa8493bdd4/nvidia_cufft_cu12-11.4.1.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c67884f2a7d276b4b80eb56a79322a95df592ae5e765cf1243693365ccab4e28", size = 200877592, upload-time = "2025-06-05T20:05:45.862Z" },
+    { url = "https://files.pythonhosted.org/packages/20/ee/29955203338515b940bd4f60ffdbc073428f25ef9bfbce44c9a066aedc5c/nvidia_cufft_cu12-11.4.1.4-py3-none-win_amd64.whl", hash = "sha256:8e5bfaac795e93f80611f807d42844e8e27e340e0cde270dcb6c65386d795b80", size = 200067309, upload-time = "2025-06-05T20:13:59.762Z" },
 ]
 
 [[package]]
@@ -4155,6 +4994,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/14/1c/2a45afc614d99558d4a773fa740d8bb5471c8398eeed925fc0fcba020173/nvidia_curand_cu12-10.3.10.19-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:de663377feb1697e1d30ed587b07d5721fdd6d2015c738d7528a6002a6134d37", size = 68292066, upload-time = "2025-05-01T19:39:13.595Z" },
     { url = "https://files.pythonhosted.org/packages/31/44/193a0e171750ca9f8320626e8a1f2381e4077a65e69e2fb9708bd479e34a/nvidia_curand_cu12-10.3.10.19-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:49b274db4780d421bd2ccd362e1415c13887c53c214f0d4b761752b8f9f6aa1e", size = 68295626, upload-time = "2025-05-01T19:39:38.885Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/98/1bd66fd09cbe1a5920cb36ba87029d511db7cca93979e635fd431ad3b6c0/nvidia_curand_cu12-10.3.10.19-py3-none-win_amd64.whl", hash = "sha256:e8129e6ac40dc123bd948e33d3e11b4aa617d87a583fa2f21b3210e90c743cde", size = 68774847, upload-time = "2025-05-01T19:48:52.93Z" },
 ]
 
 [[package]]
@@ -4162,13 +5002,14 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.5.82"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/03/99/686ff9bf3a82a531c62b1a5c614476e8dfa24a9d89067aeedf3592ee4538/nvidia_cusolver_cu12-11.7.5.82-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:62efa83e4ace59a4c734d052bb72158e888aa7b770e1a5f601682f16fe5b4fd2", size = 337869834, upload-time = "2025-06-05T20:06:53.125Z" },
     { url = "https://files.pythonhosted.org/packages/33/40/79b0c64d44d6c166c0964ec1d803d067f4a145cca23e23925fd351d0e642/nvidia_cusolver_cu12-11.7.5.82-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:15da72d1340d29b5b3cf3fd100e3cd53421dde36002eda6ed93811af63c40d88", size = 338117415, upload-time = "2025-06-05T20:07:16.809Z" },
+    { url = "https://files.pythonhosted.org/packages/32/5d/feb7f86b809f89b14193beffebe24cf2e4bf7af08372ab8cdd34d19a65a0/nvidia_cusolver_cu12-11.7.5.82-py3-none-win_amd64.whl", hash = "sha256:77666337237716783c6269a658dea310195cddbd80a5b2919b1ba8735cec8efd", size = 326215953, upload-time = "2025-06-05T20:14:41.76Z" },
 ]
 
 [[package]]
@@ -4176,11 +5017,12 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.10.65"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/6f/8710fbd17cdd1d0fc3fea7d36d5b65ce1933611c31e1861da330206b253a/nvidia_cusparse_cu12-12.5.10.65-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:221c73e7482dd93eda44e65ce567c031c07e2f93f6fa0ecd3ba876a195023e83", size = 366359408, upload-time = "2025-06-05T20:07:42.501Z" },
     { url = "https://files.pythonhosted.org/packages/12/46/b0fd4b04f86577921feb97d8e2cf028afe04f614d17fb5013de9282c9216/nvidia_cusparse_cu12-12.5.10.65-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:73060ce019ac064a057267c585bf1fd5a353734151f87472ff02b2c5c9984e78", size = 366465088, upload-time = "2025-06-05T20:08:20.413Z" },
+    { url = "https://files.pythonhosted.org/packages/73/ef/063500c25670fbd1cbb0cd3eb7c8a061585b53adb4dd8bf3492bb49b0df3/nvidia_cusparse_cu12-12.5.10.65-py3-none-win_amd64.whl", hash = "sha256:9e487468a22a1eaf1fbd1d2035936a905feb79c4ce5c2f67626764ee4f90227c", size = 362504719, upload-time = "2025-06-05T20:15:17.947Z" },
 ]
 
 [[package]]
@@ -4190,17 +5032,204 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
     { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
 ]
 
 [[package]]
 name = "nvidia-cutlass-dsl"
-version = "4.3.1"
+version = "4.2.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cuda-python" },
-    { name = "numpy" },
-    { name = "typing-extensions" },
-]
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "cuda-python", marker = "extra == 'extra-7-nemo-rl-sglang'" },
+    { name = "numpy", marker = "extra == 'extra-7-nemo-rl-sglang'" },
+    { name = "typing-extensions", marker = "extra == 'extra-7-nemo-rl-sglang'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/1d/f168a3dbd8570e5dbbe0deca217d7b374c977b4a4970ebadf3b6d0f1174f/nvidia_cutlass_dsl-4.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:10ace6e2005cb0bc04d158c7660f8ec104ab29aeffb26f1ed3bb0b5a577ccc34", size = 58535504, upload-time = "2025-09-23T14:38:29.028Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ab/5bcc0c8c620af5d4acbc71abce10e3eb3023e50342e6bc29b6461f72530e/nvidia_cutlass_dsl-4.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d7ddc9c1f5bb803718d736c907fac857fc606f1fce630c0b1d741935a72723b9", size = 62230361, upload-time = "2025-09-23T14:40:18.156Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/d5/9b79faaec3fa12c52b7de1e727af94c54184b00f280c79b667ab045550db/nvidia_cutlass_dsl-4.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c0985124a74ba435e1f756aa78e89f64c6d01e4f54de1d5a5d218ebbc1c92eff", size = 58535424, upload-time = "2025-09-23T14:37:33.064Z" },
+    { url = "https://files.pythonhosted.org/packages/43/86/78c8cd3fa1a684f3976535d7ac69e54f4ede165b5abca7979fd0820f74f2/nvidia_cutlass_dsl-4.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9356604afc8f62aac46634b3a12baf8cb3f3a6f2e44e398dcfe6ec98ff1a8d1b", size = 62230122, upload-time = "2025-09-23T14:40:46.621Z" },
+]
+
+[[package]]
+name = "nvidia-cutlass-dsl"
+version = "4.3.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "cuda-python", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra != 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "numpy", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra != 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "typing-extensions", marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra != 'extra-7-nemo-rl-sglang' or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b2/c5/f1586c64fcf569b890da776d08a32836a3ef2450cbe9e3ac2971dbecbcce/nvidia_cutlass_dsl-4.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:025a8c7a0fb80626e2a893954ea19b2e1ece8d131078c7da12b7fabc2634d04d", size = 58726236, upload-time = "2025-11-28T00:59:29.376Z" },
     { url = "https://files.pythonhosted.org/packages/dc/5b/fe6a2db1688a690a94f8ad03706fa6db2055d82fab0c4fab764e8c89640f/nvidia_cutlass_dsl-4.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b95ce5633e09f12c8d1fcd30c5db06b8325d41b3da0875d3e8a4c110ed5b5cdf", size = 58591826, upload-time = "2025-11-28T01:00:19.559Z" },
@@ -4232,8 +5261,8 @@ dependencies = [
     { name = "rich" },
     { name = "safetensors" },
     { name = "scipy" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "torchprofile" },
     { name = "tqdm" },
 ]
@@ -4257,6 +5286,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/46/0c/c75bbfb967457a0b7670b8ad267bfc4fffdf341c074e0a80db06c24ccfd4/nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:e3f1171dbdc83c5932a45f0f4c99180a70de9bd2718c1ab77d14104f6d7147f9", size = 39748338, upload-time = "2025-06-05T20:10:25.613Z" },
     { url = "https://files.pythonhosted.org/packages/97/bc/2dcba8e70cf3115b400fef54f213bcd6715a3195eba000f8330f11e40c45/nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:994a05ef08ef4b0b299829cde613a424382aff7efb08a7172c1fa616cc3af2ca", size = 39514880, upload-time = "2025-06-05T20:10:04.89Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/7e/2eecb277d8a98184d881fb98a738363fd4f14577a4d2d7f8264266e82623/nvidia_nvjitlink_cu12-12.9.86-py3-none-win_amd64.whl", hash = "sha256:cc6fcec260ca843c10e34c936921a1c426b351753587fdd638e8cff7b16bb9db", size = 35584936, upload-time = "2025-06-05T20:16:08.525Z" },
 ]
 
 [[package]]
@@ -4275,6 +5305,7 @@ source = { registry = "https://pypi.org/simple" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/86/ed/bb230dce7741f2778ba2ae3e8778fdb8bc58eee9fd95f07bf7b2d18e8081/nvidia_nvtx_cu12-12.9.79-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fec150986817f2b4e7eed72ed059f2dcb9ba3856b9a96134e448eac946a6952f", size = 85504, upload-time = "2025-06-05T20:03:10.21Z" },
     { url = "https://files.pythonhosted.org/packages/c4/e4/82155e4aaedb41621087ba219c95e99c5e417f37a7649b4fb6ec32dcb14d/nvidia_nvtx_cu12-12.9.79-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d1f258e752294acdb4f61c3d31fee87bd0f60e459f1e2f624376369b524cd15d", size = 86120, upload-time = "2025-06-05T20:02:51.838Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/cc/efd28e4b3f4019f7ef176f4baa5c1ef7dcd3ac8c9e6d2b15bcbf3f1297d3/nvidia_nvtx_cu12-12.9.79-py3-none-win_amd64.whl", hash = "sha256:1f504e573b3a955e55aae6c747e2ae561b63fdcafcd591e43d18dae9875504f8", size = 77774, upload-time = "2025-06-05T20:12:39.44Z" },
 ]
 
 [[package]]
@@ -4288,8 +5319,8 @@ dependencies = [
     { name = "psutil" },
     { name = "pynvml" },
     { name = "pyyaml" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
@@ -4367,8 +5398,8 @@ name = "onnx-ir"
 version = "0.1.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "numpy" },
     { name = "onnx" },
     { name = "typing-extensions" },
@@ -4383,8 +5414,8 @@ name = "onnxscript"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "numpy" },
     { name = "onnx" },
     { name = "onnx-ir" },
@@ -4406,11 +5437,11 @@ dependencies = [
     { name = "regex" },
     { name = "safetensors" },
     { name = "timm" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "tqdm" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/30/46/fb8be250fa7fcfc56fbeb41583645e18d868268f67fbbbeb8ed62a8ff18a/open_clip_torch-3.2.0.tar.gz", hash = "sha256:62b7743012ccc40fb7c64819fa762fba0a13dd74585ac733babe58c2974c2506", size = 1502853, upload-time = "2025-09-21T17:32:08.289Z" }
@@ -4420,7 +5451,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.6.0"
+version = "2.6.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -4432,9 +5463,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ee/c7/e42bcd89dfd47fec8a30b9e20f93e512efdbfbb3391b05bbb79a2fb295fa/openai-2.6.0.tar.gz", hash = "sha256:f119faf7fc07d7e558c1e7c32c873e241439b01bd7480418234291ee8c8f4b9d", size = 592904, upload-time = "2025-10-20T17:17:24.588Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/44/303deb97be7c1c9b53118b52825cbd1557aeeff510f3a52566b1fa66f6a2/openai-2.6.1.tar.gz", hash = "sha256:27ae704d190615fca0c0fc2b796a38f8b5879645a3a52c9c453b23f97141bb49", size = 593043, upload-time = "2025-10-24T13:29:52.79Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c0/0a/58e9dcd34abe273eaeac3807a8483073767b5609d01bb78ea2f048e515a0/openai-2.6.0-py3-none-any.whl", hash = "sha256:f33fa12070fe347b5787a7861c8dd397786a4a17e1c3186e239338dac7e2e743", size = 1005403, upload-time = "2025-10-20T17:17:22.091Z" },
+    { url = "https://files.pythonhosted.org/packages/15/0e/331df43df633e6105ff9cf45e0ce57762bd126a45ac16b25a43f6738d8a2/openai-2.6.1-py3-none-any.whl", hash = "sha256:904e4b5254a8416746a2f05649594fa41b19d799843cd134dac86167e094edef", size = 1005551, upload-time = "2025-10-24T13:29:50.973Z" },
 ]
 
 [[package]]
@@ -4616,10 +5647,79 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/28/01/d6b274a0635be0468d4dbd9cafe80c47105937a0d42434e805e67cd2ed8b/orjson-3.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:e8f6a7a27d7b7bec81bd5924163e9af03d49bbb63013f107b48eb5d16db711bc", size = 125985, upload-time = "2025-08-26T17:46:16.67Z" },
 ]
 
+[[package]]
+name = "outlines"
+version = "0.1.11"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "airportsdata" },
+    { name = "cloudpickle" },
+    { name = "diskcache" },
+    { name = "interegular" },
+    { name = "jinja2" },
+    { name = "jsonschema" },
+    { name = "lark" },
+    { name = "nest-asyncio" },
+    { name = "numpy" },
+    { name = "outlines-core", version = "0.1.26", source = { registry = "https://pypi.org/simple" } },
+    { name = "pycountry" },
+    { name = "pydantic" },
+    { name = "referencing" },
+    { name = "requests" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/d0/d59ae830bf7026425942899e3d48e77b58a713cff946a695e5405808da1b/outlines-0.1.11.tar.gz", hash = "sha256:0997bd9da1cc050e430bd08995dc7d4bd855918bafa4531e49d3f37110a23aba", size = 2488858, upload-time = "2024-12-13T07:24:08.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/13/b4/99ea4a122bef60e3fd6402d19665aff1f928e0daf8fac3044d0b73f72003/outlines-0.1.11-py3-none-any.whl", hash = "sha256:f5a5f2242ed9802d3aab7a92789bf4008d734c576be9258cc0a297f690124727", size = 87623, upload-time = "2024-12-13T07:24:05.817Z" },
+]
+
+[[package]]
+name = "outlines-core"
+version = "0.1.26"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "interegular" },
+    { name = "jsonschema" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/f3/274d07f4702728b43581235a77e545ec602b25f9b0098b288a0f3052521d/outlines_core-0.1.26.tar.gz", hash = "sha256:481c4301341e77cc8f1832d616784adb4d461b4fec65878e7c0d2cba7163a189", size = 75139, upload-time = "2024-12-12T23:38:50.703Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/86/0fb40746e579db38d89f127122a3900d9e0350f76aae8cb61adeaff44cc2/outlines_core-0.1.26-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f54633bca50055d42ea4d94ae06dcbe52d3d76a9b621b75723b1177d0d952953", size = 321874, upload-time = "2024-12-12T23:38:26.834Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/0c/b91f7bc03843796c1d643ee030b6cd8fd5a8ba2cd4856c855f140c878976/outlines_core-0.1.26-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9525321b48700dcaaabf60bcdc951e45f9357ba3fb3e1bfc81b662d7d4170e7c", size = 301995, upload-time = "2024-12-12T23:38:29.625Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/db/fa91a2d54288b900de82d86eda3adb2417b3b5b2db6256854a5e8bc85c32/outlines_core-0.1.26-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00f409f72c11f6ffadb57066950dd384d5388015028c1a1a615c9a64988dae3e", size = 321050, upload-time = "2024-12-12T23:38:32.274Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/1d/a36292b6198986bd9c3ff8c24355deb82ed5475403379ee40b5b5473e2e3/outlines_core-0.1.26-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86a1bb46adc5cbf6dfd7a7fe4105e0e2a4c6e041732a053126b41c521a1f223", size = 343201, upload-time = "2024-12-12T23:38:34.631Z" },
+    { url = "https://files.pythonhosted.org/packages/08/63/5dd2b5a364412f674b6edcb59b0c21513bdb07cdcc7613b064c1a0660d01/outlines_core-0.1.26-cp312-cp312-win32.whl", hash = "sha256:19f462f6b00935708677ad27cb4df55e0e17f6ffe713ab750f5f2683b090f95d", size = 233970, upload-time = "2024-12-12T23:38:37.318Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/56/8adf0b7446d1e975c2314454813c59eb7b195889908a2932ed34148c113c/outlines_core-0.1.26-cp312-cp312-win_amd64.whl", hash = "sha256:9b36bff12779e58883747116893a17b3551bbd10865878b951b03a44d112229a", size = 243578, upload-time = "2024-12-12T23:38:39.964Z" },
+]
+
 [[package]]
 name = "outlines-core"
 version = "0.2.11"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/d3/e04e9145f8f806723dec9b9e5227ad695a3efcd3ced7794cf7c22b15df5e/outlines_core-0.2.11.tar.gz", hash = "sha256:dfce56f717ff5083e54cbcfdb66cad243365437fccbb5509adaa7e31e030f1d8", size = 197263, upload-time = "2025-05-19T10:12:51.719Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/2c/c7636823244c70e2960060bf9bd978248dffb55c5e7c91c46d18354b2a24/outlines_core-0.2.11-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4a9db4872bae083631d720994f4cee603bce0536b33d5a988814576863b657cf", size = 1957668, upload-time = "2025-05-19T10:12:18.29Z" },
@@ -4683,6 +5783,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044, upload-time = "2025-07-07T19:19:39.999Z" },
 ]
 
+[[package]]
+name = "parso"
+version = "0.8.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d4/de/53e0bcf53d13e005bd8c92e7855142494f41171b34c2536b86187474184d/parso-0.8.5.tar.gz", hash = "sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a", size = 401205, upload-time = "2025-08-23T15:15:28.028Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887", size = 106668, upload-time = "2025-08-23T15:15:25.663Z" },
+]
+
 [[package]]
 name = "partial-json-parser"
 version = "0.2.1.1.post6"
@@ -4713,8 +5822,8 @@ dependencies = [
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "tqdm" },
     { name = "transformers" },
 ]
@@ -4725,20 +5834,33 @@ wheels = [
 
 [[package]]
 name = "perceptron"
-version = "0.1.4"
+version = "0.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama" },
     { name = "httpx", extra = ["http2"] },
     { name = "numpy" },
     { name = "pillow" },
+    { name = "pydantic" },
     { name = "rich" },
     { name = "shellingham" },
     { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/30/60/85db2243d8b550823603d8f9c5845b0dd0f01074e9aabf0b2af0c4f52565/perceptron-0.1.4.tar.gz", hash = "sha256:62fd190efb74925e2cc33c0cd38761e19959be3bdb7b24fbf9e3386d6961f690", size = 78116, upload-time = "2025-11-12T20:00:28.024Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/ff/87efbc3988094e09eb29261d545c84cd0a21376daa997435f5566281e2d2/perceptron-0.2.0.tar.gz", hash = "sha256:369ff3078ba7ac9e3b5f30d9f75ff44d72991b64c94f93c5267e751552cab3f6", size = 87447, upload-time = "2026-01-14T23:42:37.713Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/17/b7cb1a10ebb0a9a4c9fbcd96a28b43d44e08a90f620bab07e644a658d2f1/perceptron-0.1.4-py3-none-any.whl", hash = "sha256:f490a6df6c15167e91e1a528601cae98ce99a30991cf792f9ef83ebc15d335c4", size = 57421, upload-time = "2025-11-12T20:00:26.395Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/83/983a6663a7814c0772eabdf3f2e616758abd50a244dfbd770785c9c2ab95/perceptron-0.2.0-py3-none-any.whl", hash = "sha256:7dc7713778b797f3cb013406eb507ae729ca360347dba8196e82361134a436e8", size = 61076, upload-time = "2026-01-14T23:42:36.525Z" },
+]
+
+[[package]]
+name = "pexpect"
+version = "4.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ptyprocess" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
 ]
 
 [[package]]
@@ -4911,6 +6033,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/72/0824c18f3bc75810f55dacc2dd933f6ec829771180245ae3cc976195dec0/prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl", hash = "sha256:978130f3c0bb7b8ebcc90d35516a6fe13e02d2eb358c8f83887cdef7020c31e9", size = 19296, upload-time = "2025-03-19T19:35:04.323Z" },
 ]
 
+[[package]]
+name = "prompt-toolkit"
+version = "3.0.52"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wcwidth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" },
+]
+
 [[package]]
 name = "propcache"
 version = "0.3.2"
@@ -5009,6 +6143,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" },
 ]
 
+[[package]]
+name = "ptyprocess"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" },
+]
+
 [[package]]
 name = "pulp"
 version = "3.2.2"
@@ -5018,6 +6161,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/8d/a6a9d58c929a869f7f1b99b3d37b3f14ef63e2826eef581416338d686c3f/pulp-3.2.2-py3-none-any.whl", hash = "sha256:d3ca5ff11a28b3e7b2508a992d7e51f3533471d89305f0560b5fe3b6cc821043", size = 16385354, upload-time = "2025-07-29T11:42:01.829Z" },
 ]
 
+[[package]]
+name = "pure-eval"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" },
+]
+
 [[package]]
 name = "py-cpuinfo"
 version = "9.0.0"
@@ -5073,11 +6225,11 @@ wheels = [
 
 [[package]]
 name = "pyasn1"
-version = "0.6.1"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ba/e9/01f1a64245b89f039897cb0130016d79f77d52669aae6ee7b159a6c4c018/pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034", size = 145322, upload-time = "2024-09-10T22:41:42.55Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586, upload-time = "2026-01-16T18:04:18.534Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/f1/d6a797abb14f6283c0ddff96bbdd46937f64122b8c925cab503dd37f8214/pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629", size = 83135, upload-time = "2024-09-11T16:00:36.122Z" },
+    { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371, upload-time = "2026-01-16T18:04:17.174Z" },
 ]
 
 [[package]]
@@ -5440,6 +6592,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" },
 ]
 
+[[package]]
+name = "pyproject-hooks"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228, upload-time = "2024-09-29T09:24:13.293Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216, upload-time = "2024-09-29T09:24:11.978Z" },
+]
+
 [[package]]
 name = "pyrefly"
 version = "0.24.2"
@@ -5461,7 +6622,7 @@ name = "pytest"
 version = "8.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
@@ -5523,6 +6684,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
 ]
 
+[[package]]
+name = "pytest-xdist"
+version = "3.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "execnet" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -5571,12 +6745,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
 ]
 
-[[package]]
-name = "pyudorandom"
-version = "1.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/13/14/6fc20ea903eda547d6a255e995f8d4a09fdc3cf8bfacb6f85e6d669bc259/pyudorandom-1.0.0.tar.gz", hash = "sha256:f30a093a0170c15f9c7f87eb29f71f0f5fde995528b7c6dc4606d389e8c37755", size = 1599, upload-time = "2016-07-18T16:18:56.037Z" }
-
 [[package]]
 name = "pywin32"
 version = "311"
@@ -5624,7 +6792,7 @@ name = "pyzmq"
 version = "27.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi", marker = "implementation_name == 'pypy'" },
+    { name = "cffi", marker = "implementation_name == 'pypy' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/30/5f/557d2032a2f471edbcc227da724c24a1c05887b5cda1e3ae53af98b9e0a5/pyzmq-27.0.1.tar.gz", hash = "sha256:45c549204bc20e7484ffd2555f6cf02e572440ecf2f3bdd60d4404b20fddf64b", size = 281158, upload-time = "2025-08-03T05:05:40.352Z" }
 wheels = [
@@ -5695,7 +6863,7 @@ wheels = [
 
 [package.optional-dependencies]
 decord = [
-    { name = "decord", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
+    { name = "decord", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 
 [[package]]
@@ -5752,7 +6920,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" }
 wheels = [
@@ -5824,6 +6992,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
+[[package]]
+name = "requests-mock"
+version = "1.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/92/32/587625f91f9a0a3d84688bf9cfc4b2480a7e8ec327cefd0ff2ac891fd2cf/requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401", size = 60901, upload-time = "2024-03-29T03:54:29.446Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/97/ec/889fbc557727da0c34a33850950310240f2040f3b1955175fdb2b36a8910/requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563", size = 27695, upload-time = "2024-03-29T03:54:27.64Z" },
+]
+
 [[package]]
 name = "rich"
 version = "13.9.4"
@@ -6084,6 +7264,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
 ]
 
+[[package]]
+name = "scikit-build-core"
+version = "0.11.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "pathspec" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/48/b2/c11aaa746f3dfcdb46499affbc5f9784c991d354a80ca92f96a0f0f5aadf/scikit_build_core-0.11.6.tar.gz", hash = "sha256:5982ccd839735be99cfd3b92a8847c6c196692f476c215da84b79d2ad12f9f1b", size = 286006, upload-time = "2025-08-22T22:11:56.112Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/49/ec16b3db6893db788ae35f98506ff5a9c25dca7eb18cc38ada8a4c1dc944/scikit_build_core-0.11.6-py3-none-any.whl", hash = "sha256:ce6d8fe64e6b4c759ea0fb95d2f8a68f60d2df31c2989838633b8ec930736360", size = 185764, upload-time = "2025-08-22T22:11:52.438Z" },
+]
+
 [[package]]
 name = "scikit-learn"
 version = "1.7.1"
@@ -6299,11 +7492,98 @@ wheels = [
 
 [[package]]
 name = "setuptools"
-version = "79.0.1"
+version = "80.10.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bb/71/b6365e6325b3290e14957b2c3a804a529968c77a049b2ed40c095f749707/setuptools-79.0.1.tar.gz", hash = "sha256:128ce7b8f33c3079fd1b067ecbb4051a66e8526e7b65f6cec075dfc650ddfa88", size = 1367909, upload-time = "2025-04-23T22:20:59.241Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/76/95/faf61eb8363f26aa7e1d762267a8d602a1b26d4f3a1e758e92cb3cb8b054/setuptools-80.10.2.tar.gz", hash = "sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70", size = 1200343, upload-time = "2026-01-25T22:38:17.252Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/6d/b4752b044bf94cb802d88a888dc7d288baaf77d7910b7dedda74b5ceea0c/setuptools-79.0.1-py3-none-any.whl", hash = "sha256:e147c0549f27767ba362f9da434eab9c5dc0045d5304feb602a0af001089fc51", size = 1256281, upload-time = "2025-04-23T22:20:56.768Z" },
+    { url = "https://files.pythonhosted.org/packages/94/b8/f1f62a5e3c0ad2ff1d189590bfa4c46b4f3b6e49cef6f26c6ee4e575394d/setuptools-80.10.2-py3-none-any.whl", hash = "sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173", size = 1064234, upload-time = "2026-01-25T22:38:15.216Z" },
+]
+
+[[package]]
+name = "sgl-kernel"
+version = "0.3.20"
+source = { git = "https://github.com/sgl-project/sglang?subdirectory=sgl-kernel&tag=v0.5.7#232982a0dee4f0f9545189a7d9b6b9bb802e4910" }
+dependencies = [
+    { name = "scikit-build-core" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "wheel" },
+]
+
+[[package]]
+name = "sglang"
+version = "0.5.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "anthropic" },
+    { name = "av", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "blobfile" },
+    { name = "build" },
+    { name = "compressed-tensors" },
+    { name = "cuda-python" },
+    { name = "datasets" },
+    { name = "decord2" },
+    { name = "einops" },
+    { name = "fastapi" },
+    { name = "flashinfer-cubin" },
+    { name = "flashinfer-python", version = "0.5.3", source = { registry = "https://pypi.org/simple" } },
+    { name = "gguf" },
+    { name = "grpcio" },
+    { name = "grpcio-health-checking" },
+    { name = "grpcio-reflection" },
+    { name = "grpcio-tools" },
+    { name = "hf-transfer" },
+    { name = "huggingface-hub" },
+    { name = "interegular" },
+    { name = "ipython" },
+    { name = "llguidance" },
+    { name = "modelscope" },
+    { name = "msgspec" },
+    { name = "ninja" },
+    { name = "numpy" },
+    { name = "nvidia-cutlass-dsl", version = "4.2.1", source = { registry = "https://pypi.org/simple" } },
+    { name = "nvidia-ml-py" },
+    { name = "openai" },
+    { name = "openai-harmony" },
+    { name = "orjson" },
+    { name = "outlines" },
+    { name = "packaging" },
+    { name = "partial-json-parser" },
+    { name = "pillow" },
+    { name = "prometheus-client" },
+    { name = "psutil" },
+    { name = "py-spy" },
+    { name = "pybase64" },
+    { name = "pydantic" },
+    { name = "python-multipart" },
+    { name = "pyzmq" },
+    { name = "requests" },
+    { name = "scipy" },
+    { name = "sentencepiece" },
+    { name = "setproctitle" },
+    { name = "sgl-kernel" },
+    { name = "soundfile" },
+    { name = "tiktoken" },
+    { name = "timm" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch-memory-saver" },
+    { name = "torchao", version = "0.9.0", source = { registry = "https://pypi.org/simple" } },
+    { name = "torchaudio" },
+    { name = "torchcodec", marker = "(platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l') or sys_platform != 'linux'" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "tqdm" },
+    { name = "transformers" },
+    { name = "uvicorn" },
+    { name = "uvloop" },
+    { name = "xgrammar", version = "0.1.27", source = { registry = "https://pypi.org/simple" } },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/bd/1943cac907b2aa575853bf9d2a95c315caf3473ec6edd826e96d7e3adf7d/sglang-0.5.7.tar.gz", hash = "sha256:930e00658128016838d14dddb4527a0948d512cd1f265d465de98d32414b89ed", size = 3097441, upload-time = "2026-01-01T03:01:13.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/7a/51dd4cec4258905c18d2d61d925c6b8703d3bdf8eafeb6484b35273ed932/sglang-0.5.7-py3-none-any.whl", hash = "sha256:b77f9e5ca5a2ab19b3efba725958fc4de20ba97880e383738c695e8611b51100", size = 4036880, upload-time = "2026-01-01T03:01:11.852Z" },
 ]
 
 [[package]]
@@ -6463,7 +7743,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "alabaster" },
     { name = "babel" },
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "docutils" },
     { name = "imagesize" },
     { name = "jinja2" },
@@ -6486,7 +7766,7 @@ wheels = [
 
 [[package]]
 name = "sphinx-autobuild"
-version = "2024.10.3"
+version = "2025.8.25"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama" },
@@ -6496,9 +7776,9 @@ dependencies = [
     { name = "watchfiles" },
     { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a5/2c/155e1de2c1ba96a72e5dba152c509a8b41e047ee5c2def9e9f0d812f8be7/sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1", size = 14023, upload-time = "2024-10-02T23:15:30.172Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/3c/a59a3a453d4133777f7ed2e83c80b7dc817d43c74b74298ca0af869662ad/sphinx_autobuild-2025.8.25.tar.gz", hash = "sha256:9cf5aab32853c8c31af572e4fecdc09c997e2b8be5a07daf2a389e270e85b213", size = 15200, upload-time = "2025-08-25T18:44:55.436Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa", size = 11908, upload-time = "2024-10-02T23:15:28.739Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/20/56411b52f917696995f5ad27d2ea7e9492c84a043c5b49a3a3173573cd93/sphinx_autobuild-2025.8.25-py3-none-any.whl", hash = "sha256:b750ac7d5a18603e4665294323fd20f6dcc0a984117026d1986704fa68f0379a", size = 12535, upload-time = "2025-08-25T18:44:54.164Z" },
 ]
 
 [[package]]
@@ -6538,6 +7818,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338, upload-time = "2024-08-02T13:48:42.106Z" },
 ]
 
+[[package]]
+name = "sphinx-reredirects"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1b/8d/0e39fe2740d7d71417edf9a6424aa80ca2c27c17fc21282cdc39f90d5a40/sphinx_reredirects-1.1.0.tar.gz", hash = "sha256:fb9b195335ab14b43f8273287d0c7eeb637ba6c56c66581c11b47202f6718b29", size = 614624, upload-time = "2025-12-22T08:28:02.792Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/81/b5dd07067f3daac6d23687ec737b2d593740671ebcd145830c8f92d381c5/sphinx_reredirects-1.1.0-py3-none-any.whl", hash = "sha256:4b5692273c72cd2d4d917f4c6f87d5919e4d6114a752d4be033f7f5f6310efd9", size = 6351, upload-time = "2025-12-22T08:27:59.724Z" },
+]
+
 [[package]]
 name = "sphinxcontrib-applehelp"
 version = "2.0.0"
@@ -6610,7 +7902,7 @@ name = "sqlalchemy"
 version = "2.0.43"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
+    { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64') or (platform_machine != 'AMD64' and platform_machine != 'WIN32' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'ppc64le' and platform_machine != 'win32' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'AMD64' and platform_machine != 'WIN32' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'ppc64le' and platform_machine != 'win32' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'AMD64' and platform_machine != 'WIN32' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'ppc64le' and platform_machine != 'win32' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'AMD64' and platform_machine != 'WIN32' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'ppc64le' and platform_machine != 'win32' and platform_machine != 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'AMD64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'AMD64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'AMD64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'AMD64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'WIN32' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'WIN32' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'WIN32' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'WIN32' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'amd64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'amd64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'amd64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'amd64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'ppc64le' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'ppc64le' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'ppc64le' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'ppc64le' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'win32' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'win32' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'win32' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'win32' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'x86_64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d7/bc/d59b5d97d27229b0e009bd9098cd81af71c2fa5549c580a0a67b9bed0496/sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417", size = 9762949, upload-time = "2025-08-11T14:24:58.438Z" }
@@ -6643,13 +7935,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload-time = "2024-12-10T12:05:27.824Z" },
 ]
 
+[[package]]
+name = "stack-data"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asttokens" },
+    { name = "executing" },
+    { name = "pure-eval" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" },
+]
+
 [[package]]
 name = "standard-aifc"
 version = "3.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
-    { name = "standard-chunk", marker = "python_full_version >= '3.13'" },
+    { name = "audioop-lts", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "standard-chunk", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/53/6050dc3dde1671eb3db592c13b55a8005e5040131f7509cef0215212cb84/standard_aifc-3.13.0.tar.gz", hash = "sha256:64e249c7cb4b3daf2fdba4e95721f811bde8bdfc43ad9f936589b7bb2fae2e43", size = 15240, upload-time = "2024-10-30T16:01:31.772Z" }
 wheels = [
@@ -6670,7 +7976,7 @@ name = "standard-sunau"
 version = "3.13.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
+    { name = "audioop-lts", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/66/e3/ce8d38cb2d70e05ffeddc28bb09bad77cfef979eb0a299c9117f7ed4e6a9/standard_sunau-3.13.0.tar.gz", hash = "sha256:b319a1ac95a09a2378a8442f403c66f4fd4b36616d6df6ae82b8e536ee790908", size = 9368, upload-time = "2024-10-30T16:01:41.626Z" }
 wheels = [
@@ -6683,7 +7989,7 @@ version = "0.50.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
 wheels = [
@@ -6765,20 +8071,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
 ]
 
-[[package]]
-name = "tdigest"
-version = "0.5.2.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "accumulation-tree" },
-    { name = "pyudorandom" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/dd/34/7e2f78d1ed0af7d0039ab2cff45b6bf8512234b9f178bb21713084a1f2f0/tdigest-0.5.2.2.tar.gz", hash = "sha256:8deffc8bac024761786f43d9444e3b6c91008cd690323e051f068820a7364d0e", size = 6549, upload-time = "2019-05-07T18:57:40.771Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/32/72/f420480118cbdd18eb761b9936f0a927957130659a638449575b4a4f0aa7/tdigest-0.5.2.2-py2.py3-none-any.whl", hash = "sha256:e32ff6ab62e4defdb93b816c831080d94dfa1efb68a9fa1e7976c237fa9375cb", size = 9445, upload-time = "2019-05-07T18:57:37.493Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/94/fd3853b98f39d10206b08f2737d2ec2dc6f46a42dc7b7e05f4f0162d13ee/tdigest-0.5.2.2-py3-none-any.whl", hash = "sha256:dd25f8d6e6be002192bba9e4b8c16491d36c10b389f50637818603d1f67c6fb2", size = 9440, upload-time = "2019-05-07T18:57:38.942Z" },
-]
-
 [[package]]
 name = "template-project"
 version = "0.1.0"
@@ -6839,16 +8131,93 @@ name = "tensorstore"
 version = "0.1.74"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
-    { name = "numpy", marker = "python_full_version >= '3.13'" },
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "numpy", marker = "python_full_version >= '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
 wheels = [
@@ -6869,16 +8238,93 @@ name = "tensorstore"
 version = "0.1.76"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'darwin'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
-    { name = "numpy", marker = "python_full_version < '3.13'" },
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "numpy", marker = "python_full_version < '3.13' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ff/ae/947a9f232de7319b664ed8d278e9e0363e9294da73fd422c687ac4eb070e/tensorstore-0.1.76.tar.gz", hash = "sha256:ed0d565e7a038a84b1b5b5d9f7397caec200b53941d8889f44b7f63dd6abffe7", size = 6869230, upload-time = "2025-07-02T21:34:03.773Z" }
 wheels = [
@@ -6935,11 +8381,11 @@ dependencies = [
     { name = "huggingface-hub" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/94/f6/4d7a8c261341fa6ad281920618739f2a650f41043afcedb570f24e99a776/timm-1.0.16.tar.gz", hash = "sha256:a3b8130dd2cb8dc3b9f5e3d09ab6d677a6315a8695fd5264eb6d52a4a46c1044", size = 2339999, upload-time = "2025-06-26T17:09:44.208Z" }
 wheels = [
@@ -6989,20 +8435,35 @@ resolution-markers = [
     "python_full_version < '3.13' and sys_platform == 'darwin'",
 ]
 dependencies = [
-    { name = "filelock", marker = "sys_platform == 'darwin'" },
-    { name = "fsspec", marker = "sys_platform == 'darwin'" },
-    { name = "jinja2", marker = "sys_platform == 'darwin'" },
-    { name = "networkx", marker = "sys_platform == 'darwin'" },
-    { name = "setuptools", marker = "sys_platform == 'darwin'" },
-    { name = "sympy", marker = "sys_platform == 'darwin'" },
-    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
+    { name = "filelock", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "fsspec", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "jinja2", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "networkx", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "setuptools", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "sympy", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" },
+    { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" },
     { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" },
     { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" },
     { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" },
     { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" },
+    { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" },
+    { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" },
     { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" },
+    { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" },
 ]
 
 [[package]]
@@ -7010,64 +8471,374 @@ name = "torch"
 version = "2.9.0+cu129"
 source = { registry = "https://download.pytorch.org/whl/cu129" }
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "fsspec", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "jinja2", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "networkx", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "setuptools", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "sympy", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:106f9619d43edbd7087bc89b5fd1e4d9f491d9ec8ce91e84378a79b0a7c2b586" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6a2f119afeefe66eef75484f008b1a240952e45b24899d27d281961e8a395458" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-win_amd64.whl", hash = "sha256:890c0d395c529f8161b4e6423e5b2f758984721705cc6895aa3c3852da424d87" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c32735d662f8a2071838ce92ef63678cb4f8c7661cdfcd3d1b06503b7b001626" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6ba57bef11493397c151d755334092290412904e46c85ed86277a03bb24fd13a" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-win_amd64.whl", hash = "sha256:4f0cbbd568d76039381711fecd993cac4e500542ef64640b6e2a6d4342c2f381" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b0c03da4d96576207013ec352636a3911c59830cbb53b93d11a048a4e55ca7c5" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:656404556df5b9509487d2052e3b8ef2c7c1ae0b29690eab617b3716b220a184" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-win_amd64.whl", hash = "sha256:5f0cf760316c5fca00fcc99320d4454b332bf1b8d338c63012802094cc3895e2" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:1686765f05d11ac1aa33bb16150391182d8fdbd5f73197fd300a0f9d08790dd4" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:3b1537f9b8e0149607d51424c9bf2422d30ece1cf58acbea2f6a1d33831e9436" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-win_amd64.whl", hash = "sha256:c2905e9ce6411e21ce3854e9bbcdcb44117ae7695abd6e8ac30f905bf00e31a9" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:aff4ff013a21d195a9ee2bc0d069cdfe567b262f641e3981a25114a2e392b170" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:3f7ccb3cf047c33c442622c811a36826f71a7af9c8e19c12d32f941ad5fbfbb5" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-win_amd64.whl", hash = "sha256:7e50135c1b75befe05fc27250369621c82934afc9680daf1d4a71b7d1f29acd8" },
+]
+
+[[package]]
+name = "torch-memory-saver"
+version = "0.0.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/28/6c/21dfda5d31afb71f52cedff52370acbb8290485b3f0fee6816a15a3d08f1/torch_memory_saver-0.0.9.tar.gz", hash = "sha256:3bbf76391fb16870b1b0df279fc281c8a05ef8f8809400b309b0a8240e8ee5ba", size = 14220, upload-time = "2025-10-18T02:10:18.163Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/35/b22df9e730d8444d62445a594421992781c7fad271325d41656d8a32d103/torch_memory_saver-0.0.9-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:0cf26332993649f8ea1b95d7307dfba3a95ee6cee53de84a3e561fb21752b584", size = 488722, upload-time = "2025-10-18T02:10:16.825Z" },
+]
+
+[[package]]
+name = "torchao"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
     "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
 ]
-dependencies = [
-    { name = "filelock", marker = "sys_platform != 'darwin'" },
-    { name = "fsspec", marker = "sys_platform != 'darwin'" },
-    { name = "jinja2", marker = "sys_platform != 'darwin'" },
-    { name = "networkx", marker = "sys_platform != 'darwin'" },
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" },
-    { name = "setuptools", marker = "sys_platform != 'darwin'" },
-    { name = "sympy", marker = "sys_platform != 'darwin'" },
-    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux'" },
-    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
-]
-wheels = [
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp312-cp312-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp313-cp313t-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314-win_amd64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/cu129/torch-2.9.0%2Bcu129-cp314-cp314t-win_amd64.whl" },
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/fe/a24225d30775192a4c5d9cea3ecb95e6adc69d0a8b5ed98eb8e58d362344/torchao-0.9.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc708910301a9f98344d43f3fe2aa6d5e1fab706d772b6df47ff05087d664145", size = 5652091, upload-time = "2025-02-28T13:54:15.239Z" },
+    { url = "https://files.pythonhosted.org/packages/db/72/01f755514fb61eadc80b974eb4bd4f22f3009b35457773523e3bd497c511/torchao-0.9.0-py3-none-any.whl", hash = "sha256:ea5603c32762f1a9ade1a4dc7b00f5246623b24a28e49e666f614c79a408712a", size = 712541, upload-time = "2025-02-28T13:54:13.671Z" },
 ]
 
 [[package]]
 name = "torchao"
 version = "0.14.1"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and sys_platform == 'darwin' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/91/56/19abb32bbdc55d9fdebf8d6315a8f7d8ae10e387a91c631abd92afe0056b/torchao-0.14.1-cp310-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50f68db5e41952e88daa383fc2f358541e617654f388f508d5c7580c3bee9447", size = 7197175, upload-time = "2025-10-24T01:02:59.223Z" },
     { url = "https://files.pythonhosted.org/packages/41/a7/b888635fbb6ae951cffd41e1318966cbed96ec762b4999815ab68269e23f/torchao-0.14.1-py3-none-any.whl", hash = "sha256:c9896e14531817bc2ca6847b3fe71c42592ab80a43628b36668b2d6d6713fb5b", size = 1067611, upload-time = "2025-10-24T01:03:01.357Z" },
@@ -7078,8 +8849,8 @@ name = "torchaudio"
 version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/63/3c0ede3aa3d19a8a6698ddd107fa88660549360b51bf8ce2717cd498d800/torchaudio-2.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab4cbcccfd873b0fb41fcb39c9869e59ef84bb95b093f6f58e2d05172a7500d2", size = 809116, upload-time = "2025-10-15T15:52:00.911Z" },
@@ -7106,11 +8877,15 @@ wheels = [
 
 [[package]]
 name = "torchcodec"
-version = "0.6.0"
+version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/d1/3f90561df013f6a015ef19de22726b64073fee405f53d3c4b8255ab05a67/torchcodec-0.6.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:fdef91a17fb1f1a159ce23710324a9a4e6d6a885275de73700f94a9ad562c6b2", size = 1370954, upload-time = "2025-08-07T08:51:15.021Z" },
-    { url = "https://files.pythonhosted.org/packages/97/62/a938334e39101d4304619b90847d8aef7d1c607c6bcf33638f72931ae990/torchcodec-0.6.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:46dab701a2d809e975a8b07d7ee47ed34f1d903511e374c74cfc1de6a5ab0e3f", size = 1374794, upload-time = "2025-08-07T08:51:17.355Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ae/8b1d69e653894243fa66e2fec511cf203107dd146d161c9f095893c13bbc/torchcodec-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:af82d1fac3667335e089dc958b5e8eef5458e37d65cb3a94ebf81f45f00f7805", size = 3903714, upload-time = "2025-10-16T14:42:53.127Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/fd/eec92c82545038a90ffd24e3626bb3a85f7d51577b04819c1c753d380a9b/torchcodec-0.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2ec2e874dfb6fbf9bbeb792bea56317529636e78db175f56aad1e4efd6e12502", size = 1898382, upload-time = "2025-10-16T14:43:37.699Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/09/ce7436151a3825f27c00263d722b0cf093609921da6cf24b0fa8133cc415/torchcodec-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:318da9af9179d156be0a84296e909d51e4cd758598eaaea08c828790c80bf977", size = 2070488, upload-time = "2025-10-16T14:43:21.803Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/1c/40fd9358e5dd958775b8d0a01c962a022884810f441ac28229ed0e811599/torchcodec-0.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f3309252d035c888e6ae4518f5aca24f1c38f163124792d8a29a6872bf457f2", size = 3873235, upload-time = "2025-10-16T14:42:54.507Z" },
+    { url = "https://files.pythonhosted.org/packages/27/81/2e8f8657aed983f20f9ce842b19016d4aff05dd608ac0def94e013602814/torchcodec-0.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:253cc3c7a17c7be26abfcf2470e8eab3803ff3108f70be060a7efdcb49d917bc", size = 1902114, upload-time = "2025-10-16T14:43:39.112Z" },
+    { url = "https://files.pythonhosted.org/packages/09/1f/b09f028822991241eb1a31931749d034aee2c654d00f1930f4cecce595bc/torchcodec-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:c69285cb393c3b36c7bcc4e59e304076ea22b350ff6adca4a2a09b5f3f81f15c", size = 2070381, upload-time = "2025-10-16T14:43:22.942Z" },
 ]
 
 [[package]]
@@ -7119,8 +8894,8 @@ version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "urllib3" },
 ]
 wheels = [
@@ -7133,11 +8908,11 @@ version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" }
 wheels = [
@@ -7149,13 +8924,45 @@ name = "torchvision"
 version = "0.24.0"
 source = { registry = "https://download.pytorch.org/whl/cu129" }
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-]
-dependencies = [
-    { name = "numpy", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "pillow", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+dependencies = [
+    { name = "numpy", marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "pillow", marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://download.pytorch.org/whl/cu129/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl" },
@@ -7174,16 +8981,31 @@ resolution-markers = [
     "python_full_version < '3.13' and sys_platform == 'darwin'",
 ]
 dependencies = [
-    { name = "numpy", marker = "sys_platform == 'darwin'" },
-    { name = "pillow", marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "numpy", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "pillow", marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/47/ef/81e4e69e02e2c4650b30e8c11c8974f946682a30e0ab7e9803a831beff76/torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c61d40bcd2e2451e932902a702ad495ba1ec6f279e90b1e15cef2bb55dc911e2", size = 1891726, upload-time = "2025-10-15T15:51:16.977Z" },
+    { url = "https://files.pythonhosted.org/packages/00/7b/e3809b3302caea9a12c13f3adebe4fef127188438e719fd6c8dc93db1da6/torchvision-0.24.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b0531d1483fc322d7da0d83be52f0df860a75114ab87dbeeb9de765feaeda843", size = 2419495, upload-time = "2025-10-15T15:51:11.885Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e6/7324ead6793075a8c75c56abeed1236d1750de16a5613cfe2ddad164a92a/torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26b9dd9c083f8e5f7ac827de6d5b88c615d9c582dc87666770fbdf16887e4c25", size = 8050480, upload-time = "2025-10-15T15:51:24.012Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/ad/3c56fcd2a0d6e8afa80e115b5ade4302232ec99655220a51d05709819523/torchvision-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:060b7c50ed4b3fb0316b08e2e31bfd874ec2f63ef5ae02f81e54341ca4e88703", size = 4292225, upload-time = "2025-10-15T15:51:27.699Z" },
     { url = "https://files.pythonhosted.org/packages/4f/b5/b2008e4b77a8d6aada828dd0f6a438d8f94befa23fdd2d62fa0ac6e60113/torchvision-0.24.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84d79cfc6457310107ce4d712de7a3d388b24484bc9aeded4a76d8f8e3a2813d", size = 1891722, upload-time = "2025-10-15T15:51:28.854Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/02/e2f6b0ff93ca4db5751ac9c5be43f13d5e53d9e9412324f464dca1775027/torchvision-0.24.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fec12a269cf80f6b0b71471c8d498cd3bdd9d8e892c425bf39fecb604852c3b0", size = 2371478, upload-time = "2025-10-15T15:51:37.842Z" },
+    { url = "https://files.pythonhosted.org/packages/77/85/42e5fc4f716ec7b73cf1f32eeb5c77961be4d4054b26cd6a5ff97f20c966/torchvision-0.24.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:7323a9be5e3da695605753f501cdc87824888c5655d27735cdeaa9986b45884c", size = 8050200, upload-time = "2025-10-15T15:51:46.276Z" },
+    { url = "https://files.pythonhosted.org/packages/93/c2/48cb0b6b26276d2120b1e0dbc877579a748eae02b4091a7522ce54f6d5e1/torchvision-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:08cad8b204196e945f0b2d73adee952d433db1c03645851d52b22a45f1015b13", size = 4309939, upload-time = "2025-10-15T15:51:39.002Z" },
     { url = "https://files.pythonhosted.org/packages/7d/d7/3dd10830b047eeb46ae6b465474258d7b4fbb7d8872dca69bd42449f5c82/torchvision-0.24.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ab956a6e588623353e0f20d4b03eb1656cb4a3c75ca4dd8b4e32e01bc43271a", size = 2028355, upload-time = "2025-10-15T15:51:22.384Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/cf/2d7e43409089ce7070f5336161f9216d58653ee1cb26bcb5d6c84cc2de36/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:b1b3db80609c32a088554e8e94b4fc31f1033fe5bb4ac0673ec49c3eb03fb4da", size = 2374466, upload-time = "2025-10-15T15:51:35.382Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/30/8f7c328fd7e0a9665da4b6b56b1c627665c18470bfe62f3729ad3eda9aec/torchvision-0.24.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:e6635f100d455c80b43f297df4b8585a76c6a2e114802f6567ddd28d7b5479b0", size = 8217068, upload-time = "2025-10-15T15:51:36.623Z" },
+    { url = "https://files.pythonhosted.org/packages/55/a2/b6f9e40e2904574c80b3bb872c66af20bbd642053e7c8e1b9e99ab396535/torchvision-0.24.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4ce158bbdc3a9086034bced0b5212888bd5b251fee6d08a9eff151d30b4b228a", size = 4273912, upload-time = "2025-10-15T15:51:33.866Z" },
     { url = "https://files.pythonhosted.org/packages/1b/24/790a39645cc8c71bf442d54a76da9bda5caeb2a44c5f7e02498649cd99d4/torchvision-0.24.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4bdfc85a5ed706421555f32cdc5e3ddb6d40bf65ef03a274ce3c176393e2904b", size = 2028335, upload-time = "2025-10-15T15:51:26.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d7/69479a066ea773653e88eda99031e38681e9094046f87cb957af5036db0e/torchvision-0.24.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:73576a9c4a593223fbae85a64e8bbd77049abd1101893ecf3c5e981284fd58b4", size = 2371609, upload-time = "2025-10-15T15:51:29.859Z" },
+    { url = "https://files.pythonhosted.org/packages/46/64/3c7fdb3771ec992b9445a1f7a969466b23ce2cdb14e09303b3db351a0655/torchvision-0.24.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:dd565b1b06666ff399d0801d4d1824fa570c0167a179ca700a5be232527b3c62", size = 8214918, upload-time = "2025-10-15T15:51:41.465Z" },
+    { url = "https://files.pythonhosted.org/packages/58/51/abc416bc34d574ad479af738e413d9ebf93027ee92d0f4ae38f966b818f7/torchvision-0.24.0-cp314-cp314-win_amd64.whl", hash = "sha256:eb45d12ac48d757738788fd3fb8e88e647d6b2ab2424134ca87556efc72d81b5", size = 4257776, upload-time = "2025-10-15T15:51:42.642Z" },
     { url = "https://files.pythonhosted.org/packages/08/f7/261d1353c611820541ecd43046b89da3f1ae998dc786e4288b890a009883/torchvision-0.24.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:68120e7e03c31900e499a10bb7fdd63cfd67f0054c9fa108e7e27f9cd372f315", size = 2028359, upload-time = "2025-10-15T15:51:32.119Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/fd/615d8a86db1578345de7fa1edaf476fbcf4f057bf7e4fd898306b620c487/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:64e54494043eecf9f57a9881c6fdea49c62282782e737c002ae8b1639e6ea80e", size = 2374469, upload-time = "2025-10-15T15:51:40.19Z" },
+    { url = "https://files.pythonhosted.org/packages/04/98/bac11e8fdbf00d6c398246ff2781370aa72c99f2ac685c01ce79354c9a32/torchvision-0.24.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:75ef9546323b321a451239d886f0cb528f7e98bb294da47a3200effd4e572064", size = 8217060, upload-time = "2025-10-15T15:51:45.033Z" },
+    { url = "https://files.pythonhosted.org/packages/47/6f/9fba8abc468c904570699eceeb51588f9622172b8fffa4ab11bcf15598c2/torchvision-0.24.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2efb617667950814fc8bb9437e5893861b3616e214285be33cbc364a3f42c599", size = 4358490, upload-time = "2025-10-15T15:51:43.884Z" },
 ]
 
 [[package]]
@@ -7193,17 +9015,15 @@ source = { registry = "https://download.pytorch.org/whl/cu129" }
 resolution-markers = [
     "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux'",
 ]
 dependencies = [
-    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-    { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "numpy", marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "pillow", marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 wheels = [
     { url = "https://download.pytorch.org/whl/cu129/torchvision-0.24.0%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl" },
@@ -7218,13 +9038,22 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
 ]
 
+[[package]]
+name = "traitlets"
+version = "5.14.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" },
+]
+
 [[package]]
 name = "transformer-engine"
 version = "2.8.0"
@@ -7263,8 +9092,8 @@ dependencies = [
     { name = "einops" },
     { name = "onnx" },
     { name = "onnxscript" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin' or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/63/1e3953244ed4f318f87889309a56cdd664759f007967eb850ee415a5584d/transformer_engine_torch-2.8.0.tar.gz", hash = "sha256:ce09f1bd9b8e532a5c347b9e9b3a3a771722095daddca673ae82ccce8e68d759", size = 209805, upload-time = "2025-10-07T04:54:11.134Z" }
 
@@ -7291,41 +9120,140 @@ wheels = [
 
 [[package]]
 name = "triton"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
+version = "3.5.0"
+source = { registry = "https://download.pytorch.org/whl/cu129" }
 resolution-markers = [
-    "python_full_version >= '3.13' and sys_platform == 'win32'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and sys_platform == 'win32'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
-]
-dependencies = [
-    { name = "setuptools", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-7-nemo-rl-automodel' and extra != 'extra-7-nemo-rl-fsdp' and extra != 'extra-7-nemo-rl-mcore' and extra != 'extra-7-nemo-rl-sglang' and extra != 'extra-7-nemo-rl-vllm'",
+]
+wheels = [
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9b82b46df35ae9e0b85a382d99a076e6ebea23a3a5dbaca7dc24a7571e6bebad" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:76f8651a5e38c2a7da6fa2b2e41cbc00a5a32cb52bf3f520113fe90b723a310d" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:05e145b51a53573bff260431ff40fadce0838ad9928c5ee1883b53d59884e198" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6f6db89501a6dc4a492ff281460c1b15563420bc90934770aa6a7b80fd51c95" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8743eeb3f383ad3a33d508d13cc368abaa5bc6c06f61e80503aa7e004f49e24d" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581a43b2da8048db6fb73dbe8a2fe6c922f2c577ee65d23b3b76ff616737a7bc" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e1a856d1d731734ee9ed62dde548f342d34c988b8d7e235bf2037e428de2258" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d98b78a83e910256ec703486b8c275ec53975e5ac4a8cb7ce07c696e08f6b5a" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a97fdbcf6b71d73f4c04f5819ca12786ee40e4a83144bebae2616d7c0942182" },
+    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ff47e20dbefdaaa2c6968bdcc69633871bda425f2801c67bc8d8df472f1d12d4" },
 ]
 
 [[package]]
 name = "triton"
 version = "3.5.0"
-source = { registry = "https://download.pytorch.org/whl/cu129" }
+source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
+    { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" },
+    { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" },
+    { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" },
 ]
 
 [[package]]
@@ -7494,14 +9422,14 @@ dependencies = [
     { name = "depyf" },
     { name = "diskcache" },
     { name = "einops" },
-    { name = "fastapi", extra = ["standard"] },
+    { name = "fastapi", extra = ["standard"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
     { name = "filelock" },
-    { name = "flashinfer-python" },
+    { name = "flashinfer-python", version = "0.5.2", source = { registry = "https://pypi.org/simple" } },
     { name = "gguf" },
     { name = "lark" },
-    { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
+    { name = "llguidance" },
     { name = "lm-format-enforcer" },
-    { name = "mistral-common", extra = ["image"] },
+    { name = "mistral-common", extra = ["image"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
     { name = "model-hosting-container-standards" },
     { name = "msgspec" },
     { name = "ninja" },
@@ -7510,7 +9438,7 @@ dependencies = [
     { name = "openai" },
     { name = "openai-harmony" },
     { name = "opencv-python-headless" },
-    { name = "outlines-core" },
+    { name = "outlines-core", version = "0.2.11", source = { registry = "https://pypi.org/simple" } },
     { name = "partial-json-parser" },
     { name = "pillow" },
     { name = "prometheus-client" },
@@ -7523,7 +9451,7 @@ dependencies = [
     { name = "python-json-logger" },
     { name = "pyyaml" },
     { name = "pyzmq" },
-    { name = "ray", extra = ["cgraph"] },
+    { name = "ray", extra = ["cgraph"], marker = "extra == 'extra-7-nemo-rl-automodel' or extra == 'extra-7-nemo-rl-fsdp' or extra == 'extra-7-nemo-rl-mcore' or extra == 'extra-7-nemo-rl-vllm'" },
     { name = "regex" },
     { name = "requests" },
     { name = "scipy" },
@@ -7533,18 +9461,18 @@ dependencies = [
     { name = "six" },
     { name = "tiktoken" },
     { name = "tokenizers" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "torchaudio" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version >= '3.15' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_machine != 'aarch64' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (platform_python_implementation != 'CPython' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform != 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torchvision", version = "0.24.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (python_full_version >= '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (python_full_version < '3.15' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-automodel') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-mcore') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-7-nemo-rl-vllm') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "tqdm" },
     { name = "transformers" },
     { name = "typing-extensions" },
     { name = "watchfiles" },
     { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
+    { name = "xgrammar", version = "0.1.25", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 's390x' or platform_machine == 'x86_64'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/40/15/bc50794c5c6a48f075d72fde8035647d38072ad81031168d27ca631f9395/vllm-0.11.2.tar.gz", hash = "sha256:496d15bb64ca0fe73adbc57a93b29f4671fa12404c09e0ba02f777bfe60af671", size = 17287801, upload-time = "2025-11-20T08:31:35.084Z" }
 wheels = [
@@ -7743,11 +9671,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b9
 
 [[package]]
 name = "wheel"
-version = "0.45.1"
+version = "0.46.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8a/98/2d9906746cdc6a6ef809ae6338005b3f21bb568bea3165cfc6a243fdc25c/wheel-0.45.1.tar.gz", hash = "sha256:661e1abd9198507b1409a20c02106d9670b2576e916d58f520316666abca6729", size = 107545, upload-time = "2024-11-23T00:18:23.513Z" }
+dependencies = [
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/89/24/a2eb353a6edac9a0303977c4cb048134959dd2a51b48a269dfc9dde00c8a/wheel-0.46.3.tar.gz", hash = "sha256:e3e79874b07d776c40bd6033f8ddf76a7dad46a7b8aa1b2787a83083519a1803", size = 60605, upload-time = "2026-01-22T12:39:49.136Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0b/2c/87f3254fd8ffd29e4c02732eee68a83a1d3c346ae39bc6822dcbcb697f2b/wheel-0.45.1-py3-none-any.whl", hash = "sha256:708e7481cc80179af0e556bbf0cc00b8444c7321e2700b8d8580231d13017248", size = 72494, upload-time = "2024-11-23T00:18:21.207Z" },
+    { url = "https://files.pythonhosted.org/packages/87/22/b76d483683216dde3d67cba61fb2444be8d5be289bf628c13fc0fd90e5f9/wheel-0.46.3-py3-none-any.whl", hash = "sha256:4b399d56c9d9338230118d705d9737a2a468ccca63d5e813e2a4fc7815d8bc4d", size = 30557, upload-time = "2026-01-22T12:39:48.099Z" },
 ]
 
 [[package]]
@@ -7858,19 +9789,32 @@ dependencies = [
 sdist = { url = "https://files.pythonhosted.org/packages/6f/c1/cd0d6b89da38d8aa174e8eabf29530f8871daf53b886ec6b680ef9d3e71f/xformers-0.0.33.post1.tar.gz", hash = "sha256:e555258249b514ba117b3403523fe0bd7d3e92e930575f0e0dbf5f7db5b42677", size = 14784437, upload-time = "2025-11-13T20:16:14.793Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/39/94/3ad80d1070ddfb280c20a67dfbc094a93579a02910ef41f20631a9b566fe/xformers-0.0.33.post1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a8d72c6272453450eede2ed9aaa14448e6525569e14217573057ded146090db3", size = 122884756, upload-time = "2025-11-13T20:16:04.002Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/ef/4f59589fe37e206f5bb6158aa1294cfa0e79d52bca99ea0fd3f5c8a73404/xformers-0.0.33.post1-cp39-abi3-win_amd64.whl", hash = "sha256:e20729ca1647d53f86143bd57451af953bb78e72677548c972cd016238a066e3", size = 105088581, upload-time = "2025-11-13T20:16:11.221Z" },
 ]
 
 [[package]]
 name = "xgrammar"
 version = "0.1.25"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux')",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and python_full_version < '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
 dependencies = [
     { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
     { name = "ninja" },
     { name = "numpy" },
     { name = "pydantic" },
-    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
-    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-automodel') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-mcore') or (sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-vllm') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
     { name = "transformers" },
     { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "typing-extensions" },
@@ -7887,6 +9831,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/82/e48284c5061550ff682b1096c43146244207c64541cf36fcce88c66a0407/xgrammar-0.1.25-cp313-cp313-win_amd64.whl", hash = "sha256:ffadeba0b704667a7eb6202d409533e9d1e80af15a10add107684e0cde45b8e4", size = 698260, upload-time = "2025-09-21T05:58:49.44Z" },
 ]
 
+[[package]]
+name = "xgrammar"
+version = "0.1.27"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
+dependencies = [
+    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
+    { name = "ninja" },
+    { name = "numpy" },
+    { name = "pydantic" },
+    { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "torch", version = "2.9.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(sys_platform != 'darwin' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-automodel' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-fsdp' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-mcore' and extra == 'extra-7-nemo-rl-sglang') or (extra == 'extra-7-nemo-rl-sglang' and extra == 'extra-7-nemo-rl-vllm')" },
+    { name = "transformers" },
+    { name = "triton", version = "3.5.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/62/e1/b522b1e50fddd773d368c2945ef5ed628aa90c0c972027f9aa5a51d6d4f9/xgrammar-0.1.27.tar.gz", hash = "sha256:40af7bb2891f1633ec7f660723c74a92a963307d283aca9e3b4e53a0feaf1d46", size = 2303435, upload-time = "2025-11-04T03:11:53.512Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/b6/09b43e2adff45d30ebcf9110d0ff753f4c96b368adaa2d166df3dee88d5f/xgrammar-0.1.27-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:6404a7714440eb86ab0379d749f33591274eeef04787dc00d61f22069f3ed51d", size = 663319, upload-time = "2025-11-04T03:11:28.682Z" },
+    { url = "https://files.pythonhosted.org/packages/88/8b/53eb5c6d0df8df9f6350f182516a5b8c7b8b11d62650300d2c04af2bc4ea/xgrammar-0.1.27-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d01fa9894bc44a7f6a70b0301b59f3e310c0e0e7b7ea4cf5ce190b12d8220dd8", size = 636168, upload-time = "2025-11-04T03:11:30.373Z" },
+    { url = "https://files.pythonhosted.org/packages/08/1b/53d30395bb973f13255d3e3a72961f95fdfb4083877c3f93bb626e3d1522/xgrammar-0.1.27-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:906c0601bac9170e1bab77ca985259035ff9c386c347efcb191555eab86e984e", size = 8676340, upload-time = "2025-11-04T03:11:32.203Z" },
+    { url = "https://files.pythonhosted.org/packages/48/74/70cfac0171d9f309cfe18c5384330e3edc9466c436b258495fd30ecf29a3/xgrammar-0.1.27-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb68988a122f544301c496f2cac8ee82960ca7f5b3a42a952b2a00c0a55e6ca5", size = 8870650, upload-time = "2025-11-04T03:11:34.322Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/a1/0392aa9c7669c56f7f88e4423b246476a74a72c3bb9db944e1bfc029985e/xgrammar-0.1.27-cp312-cp312-win_amd64.whl", hash = "sha256:3aac335ea052afc8f8dc34b9f2afcb9462a68189423aed9f60b0941db6cfc310", size = 708811, upload-time = "2025-11-04T03:11:36.214Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/77/5aee819c00844fb333fa802507182aa19445b347840103a14bd27ed944c4/xgrammar-0.1.27-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e248488c7c8a8ba175c7d1c5b55a2dd705661bbaa87755a749f9fdda146cbe1e", size = 636084, upload-time = "2025-11-04T03:11:38.192Z" },
+    { url = "https://files.pythonhosted.org/packages/23/c2/cd15c44bd6db4411fc733303e0b85033772f3389b32210e6f0ae08f5a2c1/xgrammar-0.1.27-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ac7a307d7a739962c422969cb486aa3994e200bfa6191d9519fdca5224760f0", size = 8870005, upload-time = "2025-11-04T03:11:40.039Z" },
+    { url = "https://files.pythonhosted.org/packages/be/45/d3d3dc97c05159d9336fb4b947b22bd074ca259bd291be523c00e5696d24/xgrammar-0.1.27-cp313-cp313-win_amd64.whl", hash = "sha256:37936e04974bcb4c02a69ab734ff530669a43b03b2910c4013233dd074896ac9", size = 708726, upload-time = "2025-11-04T03:11:42.064Z" },
+]
+
 [[package]]
 name = "xxhash"
 version = "3.5.0"