From f0eb8d0c661ac83eaaaffd1529b02be197e2aa51 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 12 Feb 2026 16:38:35 -0800
Subject: [PATCH 01/25] Add nemo-skills-core subpackage for lightweight
 installs

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 .github/workflows/tests.yml                  |   2 +
 CONTRIBUTING.md                              |  43 ++++++
 core/pyproject.toml                          |  53 +++++++
 core/requirements.txt                        |  27 ++++
 docs/basics/installation.md                  |  74 ++++++++++
 mkdocs.yml                                   |   1 +
 nemo_skills/_cli_stub.py                     |  20 +++
 nemo_skills/dataset/utils.py                 | 119 +++++++--------
 nemo_skills/evaluation/evaluator/__init__.py | 146 ++++++++++---------
 nemo_skills/pipeline/__init__.py             |  10 ++
 nemo_skills/pipeline/dataset.py              | 114 +++++++++++++++
 pyproject.toml                               |   9 +-
 requirements/pipeline.txt                    |   8 +
 13 files changed, 491 insertions(+), 135 deletions(-)
 create mode 100644 core/pyproject.toml
 create mode 100644 core/requirements.txt
 create mode 100644 docs/basics/installation.md
 create mode 100644 nemo_skills/_cli_stub.py
 create mode 100644 nemo_skills/pipeline/dataset.py
 create mode 100644 requirements/pipeline.txt

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 225c5f9b00..54f99bf293 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -44,6 +44,8 @@ jobs:
       with:
         python-version: "3.10"
         cache: pip
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4dba695e4c..f35b6cc45a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -49,6 +49,49 @@ The following things are required when adding new benchmarks
   the dataset into slurm tests. This is the most comprehensive test we can do by running full
   evaluation on cluster with arbitrary model and check that results are as expected.
 
+### Respect the Core / Pipeline dependency boundary
+
+NeMo Skills is split into **Core** (agent runtime) and **Pipeline** (orchestration). The rule is simple:
+
+```
+Pipeline can import from Core.
+Core CANNOT import from Pipeline.
+```
+
+Core modules are everything under `nemo_skills/` **except** `nemo_skills/pipeline/`. They must never have top-level imports from `nemo_skills.pipeline` or `nemo_run`. This boundary is enforced by `tests/test_dependency_isolation.py` which verifies that core modules import successfully without pipeline dependencies installed.
+
+**When adding a new dependency**, put it in the right requirements file:
+
+| If the dependency is needed for... | Add it to |
+|---|---|
+| Inference, evaluation, math/code grading, MCP clients, prompt formatting | `core/requirements.txt` |
+| CLI commands, cluster orchestration, experiment tracking | `requirements/pipeline.txt` |
+| Everything else (dataset-specific deps, benchmark-specific packages) | `requirements/main.txt` only |
+
+Dependencies in `core/requirements.txt` should be things that a typical `GenerationTask` run with PythonTool would need. Dataset-specific or benchmark-specific packages (e.g., `faiss-cpu`, `sacrebleu`, `func-timeout`) go only in `requirements/main.txt`.
+
+All core and pipeline deps must also appear in `requirements/main.txt` (the monolithic file used for default installs).
+
+**Examples of correct placement:**
+
+- `httpx` -> `core/requirements.txt` (used by model inference clients)
+- `sympy` -> `core/requirements.txt` (used by math graders)
+- `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration)
+- `wandb` -> `requirements/pipeline.txt` (experiment tracking for cluster jobs)
+- `faiss-cpu` -> `requirements/main.txt` only (only needed for BFCL benchmark)
+
+**Examples of mistakes to avoid:**
+
+- Adding `nemo_run` to `core/requirements.txt` -- it is a pipeline/orchestration dependency, core must not depend on it.
+- Adding `typer` to `core/requirements.txt` -- it is the CLI framework, only used by the pipeline layer.
+- Adding a new dependency only to a subpackage file but forgetting `requirements/main.txt` -- the default install would be missing it.
+
+**When writing new core code:**
+
+- If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it.
+- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern).
+- If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
+
 ### Keep the code elegant
 When adding new features, try to keep the code simple and elegant.
 - Can you reuse / extend an existing functionality?
diff --git a/core/pyproject.toml b/core/pyproject.toml
new file mode 100644
index 0000000000..c5ccb3bd1f
--- /dev/null
+++ b/core/pyproject.toml
@@ -0,0 +1,53 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+requires = [
+    "setuptools",
+    "wheel"
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+dynamic = ["version", "dependencies"]
+
+name = "nemo-skills-core"
+description = "NeMo Skills core runtime -- inference, evaluation, and tool calling"
+readme = {text = "NeMo Skills core runtime for inference, evaluation, and tool calling. See https://nvidia-nemo.github.io/Skills for full documentation.", content-type = "text/plain"}
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: OS Independent",
+]
+requires-python = ">=3.10"
+
+[project.urls]
+homepage = "https://nvidia-nemo.github.io/Skills"
+source = "https://github.com/NVIDIA-NeMo/Skills"
+issues = "https://github.com/NVIDIA-NeMo/Skills/issues"
+
+[project.scripts]
+ns = "nemo_skills._cli_stub:main"
+
+[tool.setuptools]
+include-package-data = true
+
+[tool.setuptools.packages.find]
+where = [".."]
+exclude = ["tests", "tests.*", "core", "core.*"]
+
+[tool.setuptools.dynamic]
+version = { attr = "nemo_skills.version.__version__" }
+dependencies = {file = ["requirements.txt"]}
diff --git a/core/requirements.txt b/core/requirements.txt
new file mode 100644
index 0000000000..696507a4bf
--- /dev/null
+++ b/core/requirements.txt
@@ -0,0 +1,27 @@
+# Core dependencies for agent runtime (inference, evaluation, tool calling, math/code grading).
+# No cluster orchestration deps (nemo_run, typer, etc.)
+
+
+# --- code evaluation ---
+compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770
+evalplus @ git+https://github.com/evalplus/evalplus@c91370f
+fire
+flask
+# --- agent runtime ---
+httpx
+huggingface_hub
+hydra-core
+ipython
+litellm[caching]
+
+# --- math evaluation ---
+math-verify[antlr4_9_3]
+mcp
+numpy
+openai
+pyyaml
+requests
+rich
+sympy
+tqdm
+transformers
diff --git a/docs/basics/installation.md b/docs/basics/installation.md
new file mode 100644
index 0000000000..6d20e93684
--- /dev/null
+++ b/docs/basics/installation.md
@@ -0,0 +1,74 @@
+# Installation & Dependency Groups
+
+NeMo Skills provides two installable packages:
+
+- **`nemo-skills`** (root) -- full install with CLI, cluster orchestration, all benchmarks
+- **`nemo-skills-core`** (`core/` subdirectory) -- lightweight runtime only
+
+## Default installation
+
+`pip install nemo-skills` gives you **everything** (inference, evaluation, CLI,
+cluster orchestration, benchmarks):
+
+```bash
+pip install git+https://github.com/NVIDIA-NeMo/Skills.git
+# or, from a local clone:
+pip install -e .
+```
+
+## Lightweight installation
+
+If you only need inference, evaluation, and tool calling (no cluster orchestration):
+
+```bash
+pip install "nemo-skills-core @ git+https://github.com/NVIDIA-NeMo/Skills.git#subdirectory=core"
+# or, from a local clone:
+pip install -e core/
+```
+
+## Extras (dependency groups)
+
+| Extra | Requirements file | What it provides |
+|-------|-------------------|------------------|
+| `core` | `core/requirements.txt` | Agent runtime: inference, evaluation, tool calling (MCP), prompt formatting, math/code grading. No cluster orchestration. |
+| `pipeline` | `requirements/pipeline.txt` | CLI (`ns` command), cluster management, experiment tracking (`nemo_run`, `typer`, `wandb`). |
+| `dev` | `requirements/common-tests.txt`, `requirements/common-dev.txt` | Development and testing tools (`pytest`, `ruff`, `pre-commit`). |
+
+### Examples
+
+```bash
+# Full install (default)
+pip install -e .
+
+# Core only -- lightweight runtime for downstream integrations
+pip install -e core/
+
+# Development (everything + dev tools)
+pip install -e ".[dev]"
+```
+
+## Core / Pipeline architecture boundary
+
+The codebase enforces a one-way dependency rule: pipeline modules can import
+from core, but core modules must not import from pipeline.
+
+This boundary is enforced by `tests/test_dependency_isolation.py` which creates
+fresh virtualenvs and verifies that core modules import successfully without
+pipeline dependencies installed.
+
+### Dataset loading example
+
+The boundary shows up concretely in dataset loading:
+
+```python
+# Core: local-only dataset loading (no cluster deps)
+from nemo_skills.dataset.utils import get_dataset_module
+module, path, on_cluster = get_dataset_module("gsm8k")
+
+# Pipeline: cluster-aware dataset loading (SSH tunnels, mount resolution)
+from nemo_skills.pipeline.dataset import get_dataset_module
+module, path, on_cluster = get_dataset_module("gsm8k", cluster_config=cfg)
+```
+
+If you pass `cluster_config` to the core version, it will emit a
+`DeprecationWarning` and redirect to the pipeline version.
diff --git a/mkdocs.yml b/mkdocs.yml
index 81489a3e84..86ef9fcd99 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -56,6 +56,7 @@ nav:
   - Nemo-Skills: index.md
   - Getting started:
     - basics/index.md
+    - Installation & Dependencies: basics/installation.md
     - Cluster configs: basics/cluster-configs.md
     - Code packaging: basics/code-packaging.md
     - Prompt format: basics/prompt-format.md
diff --git a/nemo_skills/_cli_stub.py b/nemo_skills/_cli_stub.py
new file mode 100644
index 0000000000..833b3ff34e
--- /dev/null
+++ b/nemo_skills/_cli_stub.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+
+def main():
+    print("nemo-skills-core is installed (lightweight mode).\nFor the full ns CLI, run: pip install nemo-skills")
+    sys.exit(1)
diff --git a/nemo_skills/dataset/utils.py b/nemo_skills/dataset/utils.py
index 0b59a2b7db..d3037b6455 100644
--- a/nemo_skills/dataset/utils.py
+++ b/nemo_skills/dataset/utils.py
@@ -17,16 +17,15 @@
 import json
 import os
 import sys
-import tempfile
 import time
 import urllib.request
+import warnings
 from enum import Enum
 from pathlib import Path
 from typing import Dict
 from urllib.error import URLError
 
 from nemo_skills.evaluation.math_grader import extract_answer
-from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
 
 
 @contextlib.contextmanager
@@ -72,80 +71,68 @@ class ExtraDatasetType(str, Enum):
     cluster = "cluster"
 
 
-def _get_dataset_module_from_cluster(cluster_config, mounted_path):
-    # getting tmp path to download init.py
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmp_path = str(Path(tmpdir) / "init.py")
-        cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
-        try:
-            cluster_download_file(cluster_config, cluster_dataset_path, tmp_path)
-        except FileNotFoundError:
-            raise RuntimeError(
-                f"Init file {mounted_path} not found on the cluster. "
-                f"Please check the dataset name you're using. Did you forget to run prepare data commands?"
-            )
-        return import_from_path(tmp_path)
-
-
-def get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
-    is_on_cluster = False
-    if data_dir is None:
-        data_path = "/nemo_run/code/nemo_skills/dataset"
-        dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
-    else:
-        data_path = data_dir
-        if cluster_config is None or cluster_config["executor"] == "none":
-            with add_to_path(data_dir):
-                dataset_module = importlib.import_module(dataset)
-        else:
-            if cluster_config["executor"] == "local":
-                with add_to_path(get_unmounted_path(cluster_config, data_dir)):
-                    dataset_module = importlib.import_module(dataset)
-            else:
-                dataset = dataset.replace(".", "/")
-                dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset}/__init__.py")
-                is_on_cluster = True
-    return dataset_module, data_path, is_on_cluster
-
-
 def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None):
-    """
-    Get dataset module either in default folder or in extra datasets folder.
-
-    If cluster_config is provided, the data_dir will be resolved as a mounted
-    path and appropriately downloaded from the cluster.
+    """Load dataset module from the local filesystem.
 
-    Same will be done for extra_datasets if extra_datasets_type is cluster.
+    For loading datasets from a remote cluster, use
+    nemo_skills.pipeline.dataset.get_dataset_module instead.
 
     Search priority:
-    1. data_dir (or `nemo_skills.dataset` if None) folder
-    3. extra_datasets parameter if defined
-    4. `NEMO_SKILLS_EXTRA_DATASETS` environment variable
+    1. data_dir (or ``nemo_skills.dataset`` if None) folder
+    2. extra_datasets parameter if defined
+    3. ``NEMO_SKILLS_EXTRA_DATASETS`` environment variable
+
+    Args:
+        dataset: Name of the dataset (e.g., "gsm8k", "math").
+        data_dir: Optional custom directory containing dataset module.
+        cluster_config: Deprecated. Use nemo_skills.pipeline.dataset.get_dataset_module for cluster support.
+        extra_datasets: Optional path to extra datasets.
+        extra_datasets_type: Type of extra datasets location ("local" or "cluster").
+
+    Returns:
+        Tuple of (dataset_module, data_path, is_on_cluster).
     """
+    if cluster_config is not None:
+        warnings.warn(
+            "Passing cluster_config to nemo_skills.dataset.utils.get_dataset_module is deprecated. "
+            "Use nemo_skills.pipeline.dataset.get_dataset_module instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        from nemo_skills.pipeline.dataset import get_dataset_module as _pipeline_get_dataset_module
+
+        return _pipeline_get_dataset_module(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type)
+
+    # Local-only loading
     try:
-        dataset_module, data_path, is_on_cluster = get_default_dataset_module(dataset, data_dir, cluster_config)
+        if data_dir is None:
+            data_path = "/nemo_run/code/nemo_skills/dataset"
+            dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
+        else:
+            data_path = data_dir
+            with add_to_path(data_dir):
+                dataset_module = importlib.import_module(dataset)
+        return dataset_module, data_path, False
     except ModuleNotFoundError:
-        try:
-            dataset = dataset.replace(".", "/")
-            extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
-            is_on_cluster = False
-            data_path = extra_datasets
-            if extra_datasets is None:
-                raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
-            if extra_datasets_type == ExtraDatasetType.local:
-                with add_to_path(extra_datasets):
+        dataset = dataset.replace(".", "/")
+        extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
+        if extra_datasets is None:
+            raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
+        if extra_datasets_type == ExtraDatasetType.local or extra_datasets_type is None:
+            with add_to_path(extra_datasets):
+                try:
                     dataset_module = importlib.import_module(dataset)
-            else:
-                dataset_module = _get_dataset_module_from_cluster(
-                    cluster_config, f'{extra_datasets}/{dataset}/"__init__.py"'
-                )
-                is_on_cluster = True
-        except ModuleNotFoundError:
+                except ModuleNotFoundError:
+                    raise RuntimeError(
+                        f"Dataset {dataset} not found in any of the searched locations: "
+                        f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
+                    )
+            return dataset_module, extra_datasets, False
+        else:
             raise RuntimeError(
-                f"Dataset {dataset} not found in any of the searched locations: "
-                f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
+                f"Cluster-based extra_datasets_type='{extra_datasets_type}' requires cluster_config. "
+                "Use nemo_skills.pipeline.dataset.get_dataset_module instead."
             )
-    return dataset_module, data_path, is_on_cluster
 
 
 def get_lean4_header():
diff --git a/nemo_skills/evaluation/evaluator/__init__.py b/nemo_skills/evaluation/evaluator/__init__.py
index 269bff939c..9c74fb8f82 100644
--- a/nemo_skills/evaluation/evaluator/__init__.py
+++ b/nemo_skills/evaluation/evaluator/__init__.py
@@ -13,106 +13,116 @@
 # limitations under the License.
 
 import asyncio
+import importlib
 from typing import Any, Callable, Dict
 
-from nemo_skills.evaluation.evaluator.audio import AudioEvaluator
 from nemo_skills.evaluation.evaluator.base import BaseEvaluator
-from nemo_skills.evaluation.evaluator.bfcl import eval_bfcl
-from nemo_skills.evaluation.evaluator.bird import BirdEvaluator
-from nemo_skills.evaluation.evaluator.code import (
-    CodeExecEvaluator,
-    eval_bigcodebench,
-    eval_evalplus,
-    eval_human_eval_infilling,
-    eval_livebench_coding,
-    eval_livecodebench_pro,
-)
-from nemo_skills.evaluation.evaluator.compute_eval import ComputeEvalEvaluator
-from nemo_skills.evaluation.evaluator.icpc import ICPCEvaluator
-from nemo_skills.evaluation.evaluator.ifbench import eval_ifbench
-from nemo_skills.evaluation.evaluator.ifeval import eval_if
-from nemo_skills.evaluation.evaluator.ioi import IOIEvaluator
-from nemo_skills.evaluation.evaluator.livecodebench import eval_livecodebench
-from nemo_skills.evaluation.evaluator.math import (
-    Lean4ProofEvaluator,
-    MathEvaluator,
-)
-from nemo_skills.evaluation.evaluator.mcq import eval_mcq
-from nemo_skills.evaluation.evaluator.mmau_pro import eval_mmau_pro
-from nemo_skills.evaluation.evaluator.mrcr import eval_mrcr
-from nemo_skills.evaluation.evaluator.ruler import eval_ruler, eval_ruler2
-from nemo_skills.evaluation.evaluator.scicode import eval_scicode
-
-EVALUATOR_MAP = {
-    # Function-based evaluators (batch-only)
-    "evalplus": eval_evalplus,
-    "if": eval_if,
-    "ifbench": eval_ifbench,
-    "bfcl": eval_bfcl,
-    "multichoice": eval_mcq,
-    "ruler": eval_ruler,
-    "ruler2": eval_ruler2,
-    "livecodebench": eval_livecodebench,
-    "livebench_coding": eval_livebench_coding,
-    "livecodebench_pro": eval_livecodebench_pro,
-    "scicode": eval_scicode,
-    "mrcr": eval_mrcr,
-    "bigcodebench": eval_bigcodebench,
-    "human_eval_infilling": eval_human_eval_infilling,
-    "mmau-pro": eval_mmau_pro,
+
+# Lazy evaluator registry — stores dotted paths instead of eagerly importing
+# every evaluator (which would pull in benchmark-specific deps like func_timeout,
+# faiss, etc.). Actual imports happen on first use.
+
+# Function-based evaluators (batch-only): eval_type -> "module_path:function_name"
+_EVALUATOR_MAP_PATHS = {
+    "evalplus": "nemo_skills.evaluation.evaluator.code:eval_evalplus",
+    "if": "nemo_skills.evaluation.evaluator.ifeval:eval_if",
+    "ifbench": "nemo_skills.evaluation.evaluator.ifbench:eval_ifbench",
+    "bfcl": "nemo_skills.evaluation.evaluator.bfcl:eval_bfcl",
+    "multichoice": "nemo_skills.evaluation.evaluator.mcq:eval_mcq",
+    "ruler": "nemo_skills.evaluation.evaluator.ruler:eval_ruler",
+    "ruler2": "nemo_skills.evaluation.evaluator.ruler:eval_ruler2",
+    "livecodebench": "nemo_skills.evaluation.evaluator.livecodebench:eval_livecodebench",
+    "livebench_coding": "nemo_skills.evaluation.evaluator.code:eval_livebench_coding",
+    "livecodebench_pro": "nemo_skills.evaluation.evaluator.code:eval_livecodebench_pro",
+    "scicode": "nemo_skills.evaluation.evaluator.scicode:eval_scicode",
+    "mrcr": "nemo_skills.evaluation.evaluator.mrcr:eval_mrcr",
+    "bigcodebench": "nemo_skills.evaluation.evaluator.code:eval_bigcodebench",
+    "human_eval_infilling": "nemo_skills.evaluation.evaluator.code:eval_human_eval_infilling",
+    "mmau-pro": "nemo_skills.evaluation.evaluator.mmau_pro:eval_mmau_pro",
 }
 
-# Evaluator class mapping, other evaluators can be added here as they're converted to classes
-EVALUATOR_CLASS_MAP = {
-    "math": MathEvaluator,
-    "lean4-proof": Lean4ProofEvaluator,
-    "code_exec": CodeExecEvaluator,
-    "ioi": IOIEvaluator,
-    "icpc": ICPCEvaluator,
-    "audio": AudioEvaluator,
-    "bird": BirdEvaluator,
-    "compute-eval": ComputeEvalEvaluator,
+# Class-based evaluators: eval_type -> "module_path:ClassName"
+_EVALUATOR_CLASS_MAP_PATHS = {
+    "math": "nemo_skills.evaluation.evaluator.math:MathEvaluator",
+    "lean4-proof": "nemo_skills.evaluation.evaluator.math:Lean4ProofEvaluator",
+    "code_exec": "nemo_skills.evaluation.evaluator.code:CodeExecEvaluator",
+    "ioi": "nemo_skills.evaluation.evaluator.ioi:IOIEvaluator",
+    "icpc": "nemo_skills.evaluation.evaluator.icpc:ICPCEvaluator",
+    "audio": "nemo_skills.evaluation.evaluator.audio:AudioEvaluator",
+    "bird": "nemo_skills.evaluation.evaluator.bird:BirdEvaluator",
+    "compute-eval": "nemo_skills.evaluation.evaluator.compute_eval:ComputeEvalEvaluator",
 }
 
 # Validation: Ensure no overlap between class and function maps
-_class_types = set(EVALUATOR_CLASS_MAP.keys())
-_function_types = set(EVALUATOR_MAP.keys())
-_overlap = _class_types.intersection(_function_types)
+_overlap = set(_EVALUATOR_CLASS_MAP_PATHS.keys()).intersection(_EVALUATOR_MAP_PATHS.keys())
 if _overlap:
     raise ValueError(
         f"Evaluator types cannot be in both EVALUATOR_CLASS_MAP and EVALUATOR_MAP: {_overlap}. "
         f"Each eval_type must be in exactly one map."
     )
 
+# Caches for resolved imports
+_resolved_evaluator_map: Dict[str, Callable] = {}
+_resolved_class_map: Dict[str, type] = {}
+
+
+def _resolve(dotted: str):
+    """Import 'module.path:AttributeName' and return the attribute."""
+    module_path, attr_name = dotted.rsplit(":", 1)
+    module = importlib.import_module(module_path)
+    return getattr(module, attr_name)
+
+
+def _get_evaluator_fn(eval_type: str) -> Callable:
+    if eval_type not in _resolved_evaluator_map:
+        _resolved_evaluator_map[eval_type] = _resolve(_EVALUATOR_MAP_PATHS[eval_type])
+    return _resolved_evaluator_map[eval_type]
+
+
+def _get_evaluator_cls(eval_type: str) -> type:
+    if eval_type not in _resolved_class_map:
+        _resolved_class_map[eval_type] = _resolve(_EVALUATOR_CLASS_MAP_PATHS[eval_type])
+    return _resolved_class_map[eval_type]
+
+
+# --- Public API (unchanged signatures) ---
+
+# Keep EVALUATOR_MAP and EVALUATOR_CLASS_MAP as lazy-resolving dicts for
+# any code that iterates them (e.g. listing available types).
+# Direct key access goes through the public functions below.
+EVALUATOR_MAP = _EVALUATOR_MAP_PATHS
+EVALUATOR_CLASS_MAP = _EVALUATOR_CLASS_MAP_PATHS
+
 
 def is_evaluator_registered(eval_type: str):
     """Check if evaluator is registered in either class or function map."""
-    return eval_type in EVALUATOR_CLASS_MAP or eval_type in EVALUATOR_MAP
+    return eval_type in _EVALUATOR_CLASS_MAP_PATHS or eval_type in _EVALUATOR_MAP_PATHS
 
 
 def register_evaluator(eval_type: str, eval_fn: Callable[[Dict[str, Any]], None]):
     if is_evaluator_registered(eval_type):
         raise ValueError(f"Evaluator for {eval_type} already registered")
 
-    EVALUATOR_MAP[eval_type] = eval_fn
+    _EVALUATOR_MAP_PATHS[eval_type] = None
+    _resolved_evaluator_map[eval_type] = eval_fn
 
 
 def get_evaluator_class(eval_type: str, config: Dict[str, Any]) -> BaseEvaluator:
     """Get evaluator instance by type."""
-    if eval_type not in EVALUATOR_CLASS_MAP:
+    if eval_type not in _EVALUATOR_CLASS_MAP_PATHS:
         raise ValueError(
             f"Evaluator class not found for type: {eval_type}.\n"
-            f"Available types with class support: {list(EVALUATOR_CLASS_MAP.keys())}\n"
-            f"All supported types: {list(EVALUATOR_MAP.keys())}"
+            f"Available types with class support: {list(_EVALUATOR_CLASS_MAP_PATHS.keys())}\n"
+            f"All supported types: {list(_EVALUATOR_MAP_PATHS.keys())}"
         )
 
-    evaluator_class = EVALUATOR_CLASS_MAP[eval_type]
+    evaluator_class = _get_evaluator_cls(eval_type)
     return evaluator_class(config)
 
 
 def supports_single_eval(eval_type: str, config: Dict[str, Any]) -> bool:
     """Check if evaluator supports single data point evaluation during generation."""
-    if eval_type not in EVALUATOR_CLASS_MAP:
+    if eval_type not in _EVALUATOR_CLASS_MAP_PATHS:
         return False  # Only class-based evaluators support single eval
 
     evaluator = get_evaluator_class(eval_type, config)
@@ -122,15 +132,15 @@ def supports_single_eval(eval_type: str, config: Dict[str, Any]) -> bool:
 def evaluate(eval_type, eval_config):
     """Main evaluation function that handles both class-based and function-based evaluators."""
     # Check if it's a class-based evaluator first
-    if eval_type in EVALUATOR_CLASS_MAP:
+    if eval_type in _EVALUATOR_CLASS_MAP_PATHS:
         evaluator = get_evaluator_class(eval_type, eval_config)
         print(f"evaluator: {evaluator}")
         return asyncio.run(evaluator.eval_full())
 
     # Fall back to function-based evaluator
-    if eval_type in EVALUATOR_MAP:
-        return EVALUATOR_MAP[eval_type](eval_config)
+    if eval_type in _EVALUATOR_MAP_PATHS:
+        return _get_evaluator_fn(eval_type)(eval_config)
 
     # Not found in either map
-    all_types = list(EVALUATOR_CLASS_MAP.keys()) + list(EVALUATOR_MAP.keys())
+    all_types = list(_EVALUATOR_CLASS_MAP_PATHS.keys()) + list(_EVALUATOR_MAP_PATHS.keys())
     raise ValueError(f"Evaluator not found for type: {eval_type}.\nSupported types: {sorted(all_types)}")
diff --git a/nemo_skills/pipeline/__init__.py b/nemo_skills/pipeline/__init__.py
index d9155f923f..a0960f5a2b 100644
--- a/nemo_skills/pipeline/__init__.py
+++ b/nemo_skills/pipeline/__init__.py
@@ -11,3 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from importlib.metadata import PackageNotFoundError, metadata
+
+try:
+    metadata("nemo-skills")
+except PackageNotFoundError:
+    raise ImportError(
+        "nemo-skills-core is installed but pipeline functionality requires the full package. "
+        "Install it with: pip install nemo-skills"
+    ) from None
diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
new file mode 100644
index 0000000000..a238cbb4b7
--- /dev/null
+++ b/nemo_skills/pipeline/dataset.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Cluster-aware dataset loading.
+
+This module extends nemo_skills.dataset.utils with cluster support
+(SSH tunnels, mount path resolution, remote downloads).
+
+For local-only dataset loading, use nemo_skills.dataset.utils directly.
+"""
+
+import importlib
+import os
+import tempfile
+from pathlib import Path
+
+from nemo_skills.dataset.utils import (
+    ExtraDatasetType,
+    add_to_path,
+    import_from_path,
+)
+from nemo_skills.dataset.utils import (
+    get_dataset_module as _get_local_dataset_module,
+)
+from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
+
+
+def _get_dataset_module_from_cluster(cluster_config, mounted_path):
+    """Download and import a dataset module from a remote cluster."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = str(Path(tmpdir) / "init.py")
+        cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
+        try:
+            cluster_download_file(cluster_config, cluster_dataset_path, tmp_path)
+        except FileNotFoundError:
+            raise RuntimeError(
+                f"Init file {mounted_path} not found on the cluster. "
+                f"Please check the dataset name you're using. Did you forget to run prepare data commands?"
+            )
+        return import_from_path(tmp_path)
+
+
+def _get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
+    """Load dataset module with cluster support.
+
+    For local-only loading (no cluster_config), delegates to
+    nemo_skills.dataset.utils.get_dataset_module.
+    """
+    if cluster_config is None or cluster_config.get("executor") in (None, "none"):
+        # Delegate to core for local-only loading
+        return _get_local_dataset_module(dataset, data_dir)
+
+    is_on_cluster = False
+    if data_dir is None:
+        data_path = "/nemo_run/code/nemo_skills/dataset"
+        dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
+    else:
+        data_path = data_dir
+        if cluster_config["executor"] == "local":
+            with add_to_path(get_unmounted_path(cluster_config, data_dir)):
+                dataset_module = importlib.import_module(dataset)
+        else:
+            dataset = dataset.replace(".", "/")
+            dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset}/__init__.py")
+            is_on_cluster = True
+    return dataset_module, data_path, is_on_cluster
+
+
+def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None):
+    """Load dataset module with optional cluster support.
+
+    This is the cluster-aware version of nemo_skills.dataset.utils.get_dataset_module.
+    If cluster_config is None or executor is "none", delegates to the core version.
+
+    Search priority:
+    1. data_dir (or `nemo_skills.dataset` if None) folder
+    2. extra_datasets parameter if defined
+    3. `NEMO_SKILLS_EXTRA_DATASETS` environment variable
+    """
+    try:
+        dataset_module, data_path, is_on_cluster = _get_default_dataset_module(dataset, data_dir, cluster_config)
+    except ModuleNotFoundError:
+        try:
+            dataset = dataset.replace(".", "/")
+            extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
+            is_on_cluster = False
+            data_path = extra_datasets
+            if extra_datasets is None:
+                raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
+            if extra_datasets_type == ExtraDatasetType.local or extra_datasets_type is None:
+                with add_to_path(extra_datasets):
+                    dataset_module = importlib.import_module(dataset)
+            else:
+                dataset_module = _get_dataset_module_from_cluster(
+                    cluster_config, f"{extra_datasets}/{dataset}/__init__.py"
+                )
+                is_on_cluster = True
+        except ModuleNotFoundError:
+            raise RuntimeError(
+                f"Dataset {dataset} not found in any of the searched locations: "
+                f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
+            )
+    return dataset_module, data_path, is_on_cluster
diff --git a/pyproject.toml b/pyproject.toml
index d86f184bc6..515ba8be7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,7 +55,14 @@ exclude = ["tests", "tests.*"]
 [tool.setuptools.dynamic]
 version = { attr = "nemo_skills.version.__version__" }
 dependencies = {file = ["requirements/main.txt"]}
-optional-dependencies = {dev = { file = ["requirements/common-tests.txt", "requirements/common-dev.txt"] } }
+
+[tool.setuptools.dynamic.optional-dependencies]
+# Core: lightweight runtime only (inference, evaluation, tool calling)
+core = { file = ["core/requirements.txt"] }
+# Pipeline: orchestration, cluster management, CLI (adds nemo_run, typer, wandb)
+pipeline = { file = ["requirements/pipeline.txt"] }
+# Dev: development and testing tools
+dev = { file = ["requirements/common-tests.txt", "requirements/common-dev.txt"] }
 
 [tool.pytest.ini_options]
 markers = [
diff --git a/requirements/pipeline.txt b/requirements/pipeline.txt
new file mode 100644
index 0000000000..eb839a91da
--- /dev/null
+++ b/requirements/pipeline.txt
@@ -0,0 +1,8 @@
+# Pipeline/orchestration dependencies (CLI, cluster management, experiment tracking).
+# These are additional to core.txt.
+
+click < 8.2.0  # https://github.com/ai-dynamo/dynamo/issues/1039
+nemo-evaluator-launcher<0.1.47
+nemo_run @ git+https://github.com/NVIDIA-NeMo/Run
+typer >= 0.13
+wandb

From 0a4f056da8b28560b697772d2bbc27d4b7571c3e Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 13:19:41 -0800
Subject: [PATCH 02/25] Suggestions from code review addressed

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/evaluation/evaluator/__init__.py | 6 +++---
 nemo_skills/pipeline/dataset.py              | 2 +-
 requirements/main.txt                        | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/nemo_skills/evaluation/evaluator/__init__.py b/nemo_skills/evaluation/evaluator/__init__.py
index 9c74fb8f82..28861484d0 100644
--- a/nemo_skills/evaluation/evaluator/__init__.py
+++ b/nemo_skills/evaluation/evaluator/__init__.py
@@ -103,17 +103,18 @@ def register_evaluator(eval_type: str, eval_fn: Callable[[Dict[str, Any]], None]
     if is_evaluator_registered(eval_type):
         raise ValueError(f"Evaluator for {eval_type} already registered")
 
-    _EVALUATOR_MAP_PATHS[eval_type] = None
+    _EVALUATOR_MAP_PATHS[eval_type] = "<dynamically-registered>"
     _resolved_evaluator_map[eval_type] = eval_fn
 
 
 def get_evaluator_class(eval_type: str, config: Dict[str, Any]) -> BaseEvaluator:
     """Get evaluator instance by type."""
     if eval_type not in _EVALUATOR_CLASS_MAP_PATHS:
+        all_types = sorted(list(_EVALUATOR_CLASS_MAP_PATHS.keys()) + list(_EVALUATOR_MAP_PATHS.keys()))
         raise ValueError(
             f"Evaluator class not found for type: {eval_type}.\n"
             f"Available types with class support: {list(_EVALUATOR_CLASS_MAP_PATHS.keys())}\n"
-            f"All supported types: {list(_EVALUATOR_MAP_PATHS.keys())}"
+            f"All supported types: {all_types}"
         )
 
     evaluator_class = _get_evaluator_cls(eval_type)
@@ -134,7 +135,6 @@ def evaluate(eval_type, eval_config):
     # Check if it's a class-based evaluator first
     if eval_type in _EVALUATOR_CLASS_MAP_PATHS:
         evaluator = get_evaluator_class(eval_type, eval_config)
-        print(f"evaluator: {evaluator}")
         return asyncio.run(evaluator.eval_full())
 
     # Fall back to function-based evaluator
diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index a238cbb4b7..4a5b96185d 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -57,7 +57,7 @@ def _get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
     For local-only loading (no cluster_config), delegates to
     nemo_skills.dataset.utils.get_dataset_module.
     """
-    if cluster_config is None or cluster_config.get("executor") in (None, "none"):
+    if cluster_config is None or cluster_config["executor"] in (None, "none"):
         # Delegate to core for local-only loading
         return _get_local_dataset_module(dataset, data_dir)
 
diff --git a/requirements/main.txt b/requirements/main.txt
index 67ecf92c1f..12d56df3ab 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -43,6 +43,7 @@ openai
 pyyaml
 rank_bm25 # Needed for BFCLv4
 requests
+rich
 sacrebleu
 scikit-learn
 sentence_transformers # Needed for BFCLv4 

From e2361e62cf06ca4a087e0a237cd608d59ab87d24 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 13:50:47 -0800
Subject: [PATCH 03/25] Revert sentinel string back to None in
 register_evaluator

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/evaluation/evaluator/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo_skills/evaluation/evaluator/__init__.py b/nemo_skills/evaluation/evaluator/__init__.py
index 28861484d0..f4617bf45e 100644
--- a/nemo_skills/evaluation/evaluator/__init__.py
+++ b/nemo_skills/evaluation/evaluator/__init__.py
@@ -103,7 +103,7 @@ def register_evaluator(eval_type: str, eval_fn: Callable[[Dict[str, Any]], None]
     if is_evaluator_registered(eval_type):
         raise ValueError(f"Evaluator for {eval_type} already registered")
 
-    _EVALUATOR_MAP_PATHS[eval_type] = "<dynamically-registered>"
+    _EVALUATOR_MAP_PATHS[eval_type] = None
     _resolved_evaluator_map[eval_type] = eval_fn
 
 

From cc705010f2f9aa7e243e6696680110fd5d4bc656 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 13:58:39 -0800
Subject: [PATCH 04/25] Fix extra_datasets silently ignored when cluster_config
 is None

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/pipeline/dataset.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index 4a5b96185d..97867b1262 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -51,7 +51,9 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path):
         return import_from_path(tmp_path)
 
 
-def _get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
+def _get_default_dataset_module(
+    dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None
+):
     """Load dataset module with cluster support.
 
     For local-only loading (no cluster_config), delegates to
@@ -59,7 +61,9 @@ def _get_default_dataset_module(dataset, data_dir=None, cluster_config=None):
     """
     if cluster_config is None or cluster_config["executor"] in (None, "none"):
         # Delegate to core for local-only loading
-        return _get_local_dataset_module(dataset, data_dir)
+        return _get_local_dataset_module(
+            dataset, data_dir, extra_datasets=extra_datasets, extra_datasets_type=extra_datasets_type
+        )
 
     is_on_cluster = False
     if data_dir is None:
@@ -89,7 +93,9 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datase
     3. `NEMO_SKILLS_EXTRA_DATASETS` environment variable
     """
     try:
-        dataset_module, data_path, is_on_cluster = _get_default_dataset_module(dataset, data_dir, cluster_config)
+        dataset_module, data_path, is_on_cluster = _get_default_dataset_module(
+            dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type
+        )
     except ModuleNotFoundError:
         try:
             dataset = dataset.replace(".", "/")

From a0a2aa7398a8c0c9585f01c10bc83de63f567d05 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 22:35:03 -0800
Subject: [PATCH 05/25] Add all benchmark evaluator deps to
 core/requirements.txt

Per review feedback: all benchmark-specific packages should go to core
for now since JIT install is not yet implemented. Previously only
PythonTool-specific deps were in core while benchmark deps like datasets,
sacrebleu, faiss-cpu, etc. were only in main.txt. This led to an
inconsistent boundary where math grader deps were in core but BFCL deps
were not, despite both being benchmark-specific.

Addresses review comments #1, #4, #6 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 core/requirements.txt | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/core/requirements.txt b/core/requirements.txt
index 696507a4bf..170ae0acca 100644
--- a/core/requirements.txt
+++ b/core/requirements.txt
@@ -1,17 +1,30 @@
-# Core dependencies for agent runtime (inference, evaluation, tool calling, math/code grading).
+# Core dependencies for inference, evaluation, tool calling, and all benchmark evaluators.
 # No cluster orchestration deps (nemo_run, typer, etc.)
+# NOTE: benchmark-specific deps are included here because JIT install is not yet implemented.
+# Once JIT install is ready, benchmark deps can be moved to per-benchmark extras.
 
 
+# --- benchmark evaluator deps ---
+bs4
+
 # --- code evaluation ---
 compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770
+datasets
+editdistance
 evalplus @ git+https://github.com/evalplus/evalplus@c91370f
+faiss-cpu
 fire
 flask
+func-timeout
+gradio
 # --- agent runtime ---
 httpx
 huggingface_hub
 hydra-core
 ipython
+iso639-lang
+langcodes
+language-data
 litellm[caching]
 
 # --- math evaluation ---
@@ -20,8 +33,13 @@ mcp
 numpy
 openai
 pyyaml
+rank_bm25
 requests
 rich
+sacrebleu
+scikit-learn
+sentence_transformers
+serpapi
 sympy
 tqdm
 transformers

From 7cf5fb6da44ef5250116c161ecfa319589072f6f Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 22:35:23 -0800
Subject: [PATCH 06/25] Eliminate requirements/main.txt duplication

pyproject.toml now composes default dependencies from
core/requirements.txt + requirements/pipeline.txt instead of
maintaining a separate monolithic main.txt that duplicated both.

This ensures a single source of truth for each dependency: it lives
in exactly one requirements file, and pyproject.toml references both.

Addresses review comment #5 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 pyproject.toml        |  2 +-
 requirements/main.txt | 55 -------------------------------------------
 2 files changed, 1 insertion(+), 56 deletions(-)
 delete mode 100644 requirements/main.txt

diff --git a/pyproject.toml b/pyproject.toml
index 515ba8be7f..2e5a5afb14 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ exclude = ["tests", "tests.*"]
 
 [tool.setuptools.dynamic]
 version = { attr = "nemo_skills.version.__version__" }
-dependencies = {file = ["requirements/main.txt"]}
+dependencies = {file = ["core/requirements.txt", "requirements/pipeline.txt"]}
 
 [tool.setuptools.dynamic.optional-dependencies]
 # Core: lightweight runtime only (inference, evaluation, tool calling)
diff --git a/requirements/main.txt b/requirements/main.txt
deleted file mode 100644
index 12d56df3ab..0000000000
--- a/requirements/main.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-bs4
-click < 8.2.0  # https://github.com/ai-dynamo/dynamo/issues/1039
-compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770
-datasets
-editdistance
-# ddgs # Needed for BFCLv4 - currently cannot be installed directly due to lepton obsolete httpx version in use
-evalplus @ git+https://github.com/evalplus/evalplus@c91370f
-faiss-cpu # Needed for BFCLv4
-fire
-
-# needed local code execution server for persistent sessions
-flask
-func-timeout # Needed for BIRD benchmark
-gradio
-httpx
-huggingface_hub
-hydra-core
-ipython
-iso639-lang
-langcodes
-language-data
-litellm[caching]
-math-verify[antlr4_9_3]
-mcp
-nemo-evaluator-launcher<0.1.47
-nemo_run @ git+https://github.com/NVIDIA-NeMo/Run
-numpy
-openai
-pyyaml
-rank_bm25 # Needed for BFCLv4
-requests
-rich
-sacrebleu
-scikit-learn
-sentence_transformers # Needed for BFCLv4 
-serpapi # Needed for BFCLv4
-sympy
-tqdm
-transformers
-typer >= 0.13
-wandb

From 9e84d391244425530bd5d21a1ce45e03f5567c2d Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 22:36:02 -0800
Subject: [PATCH 07/25] Add tests/test_dependency_isolation.py

Creates the test file referenced in docs/basics/installation.md that
verifies the core/pipeline dependency boundary. Tests import each
core module in a subprocess where nemo_run and nemo_skills.pipeline
are blocked, ensuring core has no top-level pipeline dependencies.

Addresses review comment #2 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 tests/test_dependency_isolation.py | 79 ++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 tests/test_dependency_isolation.py

diff --git a/tests/test_dependency_isolation.py b/tests/test_dependency_isolation.py
new file mode 100644
index 0000000000..928f28b1ee
--- /dev/null
+++ b/tests/test_dependency_isolation.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests that core modules can be imported without pipeline dependencies.
+
+The core/pipeline boundary rule is: Pipeline can import Core, but Core cannot
+import Pipeline. Concretely, this means that core modules must not have
+top-level imports of nemo_run or anything under nemo_skills.pipeline.
+
+These tests verify this by importing core modules in a subprocess where
+nemo_run is blocked via a sys.modules override.
+"""
+
+import subprocess
+import sys
+
+import pytest
+
+# Core modules that must be importable without nemo_run / pipeline
+CORE_MODULES = [
+    "nemo_skills.inference.generate",
+    "nemo_skills.inference.model",
+    "nemo_skills.evaluation.evaluator",
+    "nemo_skills.evaluation.math_grader",
+    "nemo_skills.dataset.utils",
+    "nemo_skills.mcp.tool_manager",
+    "nemo_skills.code_execution.sandbox",
+    "nemo_skills.utils",
+]
+
+
+@pytest.mark.parametrize("module_name", CORE_MODULES)
+def test_core_module_imports_without_nemo_run(module_name):
+    """Each core module must import successfully when nemo_run is unavailable."""
+    script = (
+        "import sys; "
+        # Block nemo_run from being importable
+        "sys.modules['nemo_run'] = None; "
+        "sys.modules['nemo_skills.pipeline'] = None; "
+        "sys.modules['nemo_skills.pipeline.utils'] = None; "
+        f"import {module_name}; "
+        "print('OK')"
+    )
+    result = subprocess.run(
+        [sys.executable, "-c", script],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    assert result.returncode == 0, (
+        f"Importing {module_name} failed when nemo_run is unavailable.\n"
+        f"stdout: {result.stdout}\n"
+        f"stderr: {result.stderr}"
+    )
+
+
+def test_pipeline_can_import_core():
+    """Pipeline modules should be able to import core modules."""
+    script = "from nemo_skills.pipeline.dataset import get_dataset_module; print('OK')"
+    result = subprocess.run(
+        [sys.executable, "-c", script],
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    assert result.returncode == 0, (
+        f"Pipeline failed to import core modules.\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    )

From ae191dadc24a89ed091080caeb76e97100d55072 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 22:36:47 -0800
Subject: [PATCH 08/25] Revise CONTRIBUTING.md dependency boundary guidance

Rewrite the dependency boundary section to:
- Define core as "everything needed for inference + evaluation" (not
  just PythonTool-specific deps)
- Remove references to deleted requirements/main.txt
- Clarify that all benchmark evaluator deps go to core until JIT
  install is implemented
- Improve dataset module separation guidance (pipeline = cluster I/O
  only, core = all local logic)
- Add note about summarize-results refactor (issue #779)

Addresses review comments #3, #4, #6, #7 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 CONTRIBUTING.md | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f35b6cc45a..eeb3eb45e3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -64,34 +64,40 @@ Core modules are everything under `nemo_skills/` **except** `nemo_skills/pipelin
 
 | If the dependency is needed for... | Add it to |
 |---|---|
-| Inference, evaluation, math/code grading, MCP clients, prompt formatting | `core/requirements.txt` |
-| CLI commands, cluster orchestration, experiment tracking | `requirements/pipeline.txt` |
-| Everything else (dataset-specific deps, benchmark-specific packages) | `requirements/main.txt` only |
+| Inference, evaluation, tool calling, any benchmark evaluator | `core/requirements.txt` |
+| CLI commands (`ns`), cluster orchestration, experiment tracking | `requirements/pipeline.txt` |
 
-Dependencies in `core/requirements.txt` should be things that a typical `GenerationTask` run with PythonTool would need. Dataset-specific or benchmark-specific packages (e.g., `faiss-cpu`, `sacrebleu`, `func-timeout`) go only in `requirements/main.txt`.
+There is no separate `main.txt` — `pyproject.toml` composes the default install from `core/requirements.txt` + `requirements/pipeline.txt`. Each dependency lives in exactly one file.
 
-All core and pipeline deps must also appear in `requirements/main.txt` (the monolithic file used for default installs).
+**Boundary definition:**
+
+- **Core** = everything needed to run inference + evaluation locally (including all benchmark evaluator deps)
+- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `wandb`, `click`, `nemo-evaluator-launcher`)
+
+All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`, `func-timeout`) go in `core/requirements.txt`. Eventually these should migrate to JIT (just-in-time) install so that benchmark deps are installed on demand at runtime, but until that is implemented, they must be in core so evaluators do not crash at runtime.
 
 **Examples of correct placement:**
 
 - `httpx` -> `core/requirements.txt` (used by model inference clients)
 - `sympy` -> `core/requirements.txt` (used by math graders)
+- `sacrebleu` -> `core/requirements.txt` (used by translation benchmark evaluator)
+- `faiss-cpu` -> `core/requirements.txt` (used by BFCL benchmark evaluator)
 - `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration)
 - `wandb` -> `requirements/pipeline.txt` (experiment tracking for cluster jobs)
-- `faiss-cpu` -> `requirements/main.txt` only (only needed for BFCL benchmark)
 
 **Examples of mistakes to avoid:**
 
 - Adding `nemo_run` to `core/requirements.txt` -- it is a pipeline/orchestration dependency, core must not depend on it.
 - Adding `typer` to `core/requirements.txt` -- it is the CLI framework, only used by the pipeline layer.
-- Adding a new dependency only to a subpackage file but forgetting `requirements/main.txt` -- the default install would be missing it.
 
 **When writing new core code:**
 
 - If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it.
-- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern).
+- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern). The pipeline wrapper should **only** handle cluster I/O (SSH downloads, mount resolution), then delegate to core for all local logic.
 - If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
 
+> **Note:** `summarize-results` currently straddles core and pipeline (it downloads files from the cluster and processes them locally). A clean separation is tracked in [issue #779](https://github.com/NVIDIA-NeMo/Skills/issues/779#issuecomment-3344395623).
+
 ### Keep the code elegant
 When adding new features, try to keep the code simple and elegant.
 - Can you reuse / extend an existing functionality?

From 8118b5ade34cabadde7dd55c9c7c6566a3dd649e Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Fri, 13 Feb 2026 22:37:42 -0800
Subject: [PATCH 09/25] Simplify pipeline/dataset.py to eliminate local logic
 duplication

Refactor pipeline/dataset.py so it ONLY handles cluster I/O (SSH
downloads, mount path resolution) and delegates all local
import/resolution logic to core's dataset/utils.py.

Key changes:
- Extract cluster-specific loading into _get_cluster_dataset_module()
- For local extra_datasets fallback, delegate to core instead of
  reimplementing add_to_path + import_module
- For non-cluster cases, delegate entirely to core from the start
- Remove duplicated local import logic that was parallel to core

Addresses review comment #7 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/pipeline/dataset.py | 102 ++++++++++++++++----------------
 1 file changed, 50 insertions(+), 52 deletions(-)

diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index 97867b1262..0702136b57 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -14,8 +14,9 @@
 
 """Cluster-aware dataset loading.
 
-This module extends nemo_skills.dataset.utils with cluster support
-(SSH tunnels, mount path resolution, remote downloads).
+This module adds cluster support (SSH downloads, mount path resolution)
+on top of nemo_skills.dataset.utils. The pipeline layer ONLY handles
+cluster I/O — all local import/resolution logic lives in core.
 
 For local-only dataset loading, use nemo_skills.dataset.utils directly.
 """
@@ -51,70 +52,67 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path):
         return import_from_path(tmp_path)
 
 
-def _get_default_dataset_module(
-    dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None
-):
-    """Load dataset module with cluster support.
+def _get_cluster_dataset_module(dataset, data_dir, cluster_config):
+    """Load dataset module when running on a cluster.
 
-    For local-only loading (no cluster_config), delegates to
-    nemo_skills.dataset.utils.get_dataset_module.
+    Handles cluster-specific cases: local executor with mount paths,
+    and remote executor with SSH downloads.
     """
-    if cluster_config is None or cluster_config["executor"] in (None, "none"):
-        # Delegate to core for local-only loading
-        return _get_local_dataset_module(
-            dataset, data_dir, extra_datasets=extra_datasets, extra_datasets_type=extra_datasets_type
-        )
-
-    is_on_cluster = False
     if data_dir is None:
         data_path = "/nemo_run/code/nemo_skills/dataset"
         dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
-    else:
-        data_path = data_dir
-        if cluster_config["executor"] == "local":
-            with add_to_path(get_unmounted_path(cluster_config, data_dir)):
-                dataset_module = importlib.import_module(dataset)
-        else:
-            dataset = dataset.replace(".", "/")
-            dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset}/__init__.py")
-            is_on_cluster = True
-    return dataset_module, data_path, is_on_cluster
+        return dataset_module, data_path, False
+
+    data_path = data_dir
+    if cluster_config["executor"] == "local":
+        with add_to_path(get_unmounted_path(cluster_config, data_dir)):
+            dataset_module = importlib.import_module(dataset)
+        return dataset_module, data_path, False
+
+    dataset = dataset.replace(".", "/")
+    dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset}/__init__.py")
+    return dataset_module, data_path, True
 
 
 def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None):
     """Load dataset module with optional cluster support.
 
     This is the cluster-aware version of nemo_skills.dataset.utils.get_dataset_module.
-    If cluster_config is None or executor is "none", delegates to the core version.
+    If cluster_config is None or executor is "none", delegates entirely to core.
 
     Search priority:
-    1. data_dir (or `nemo_skills.dataset` if None) folder
+    1. data_dir (or ``nemo_skills.dataset`` if None) folder
     2. extra_datasets parameter if defined
-    3. `NEMO_SKILLS_EXTRA_DATASETS` environment variable
+    3. ``NEMO_SKILLS_EXTRA_DATASETS`` environment variable
     """
-    try:
-        dataset_module, data_path, is_on_cluster = _get_default_dataset_module(
-            dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type
+    if cluster_config is None or cluster_config["executor"] in (None, "none"):
+        # No cluster — delegate entirely to core
+        return _get_local_dataset_module(
+            dataset, data_dir, extra_datasets=extra_datasets, extra_datasets_type=extra_datasets_type
         )
+
+    # Cluster path: try primary location first
+    try:
+        return _get_cluster_dataset_module(dataset, data_dir, cluster_config)
     except ModuleNotFoundError:
-        try:
-            dataset = dataset.replace(".", "/")
-            extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
-            is_on_cluster = False
-            data_path = extra_datasets
-            if extra_datasets is None:
-                raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
-            if extra_datasets_type == ExtraDatasetType.local or extra_datasets_type is None:
-                with add_to_path(extra_datasets):
-                    dataset_module = importlib.import_module(dataset)
-            else:
-                dataset_module = _get_dataset_module_from_cluster(
-                    cluster_config, f"{extra_datasets}/{dataset}/__init__.py"
-                )
-                is_on_cluster = True
-        except ModuleNotFoundError:
-            raise RuntimeError(
-                f"Dataset {dataset} not found in any of the searched locations: "
-                f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
-            )
-    return dataset_module, data_path, is_on_cluster
+        pass
+
+    # Fallback to extra_datasets
+    extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
+    if extra_datasets is None:
+        raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
+
+    if extra_datasets_type == ExtraDatasetType.cluster:
+        # Cluster extra_datasets: download from remote
+        dataset = dataset.replace(".", "/")
+        dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{extra_datasets}/{dataset}/__init__.py")
+        return dataset_module, extra_datasets, True
+
+    # Local extra_datasets: delegate to core (handles add_to_path + import)
+    try:
+        return _get_local_dataset_module(dataset, data_dir=extra_datasets)
+    except RuntimeError:
+        raise RuntimeError(
+            f"Dataset {dataset} not found in any of the searched locations: "
+            f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
+        )

From fdcfdb11d41dc7bbe681116b0d9721df698d771c Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 12:46:04 -0800
Subject: [PATCH 10/25] Remove section comments from core/requirements.txt

The section labels (agent runtime, math evaluation, code evaluation,
benchmark evaluator deps) were misleading since many deps span
multiple categories. Keep it as a flat alphabetical list.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 core/requirements.txt | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/core/requirements.txt b/core/requirements.txt
index 170ae0acca..da1efe8af7 100644
--- a/core/requirements.txt
+++ b/core/requirements.txt
@@ -3,11 +3,7 @@
 # NOTE: benchmark-specific deps are included here because JIT install is not yet implemented.
 # Once JIT install is ready, benchmark deps can be moved to per-benchmark extras.
 
-
-# --- benchmark evaluator deps ---
 bs4
-
-# --- code evaluation ---
 compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770
 datasets
 editdistance
@@ -17,7 +13,6 @@ fire
 flask
 func-timeout
 gradio
-# --- agent runtime ---
 httpx
 huggingface_hub
 hydra-core
@@ -26,8 +21,6 @@ iso639-lang
 langcodes
 language-data
 litellm[caching]
-
-# --- math evaluation ---
 math-verify[antlr4_9_3]
 mcp
 numpy

From 49aed5e45ebb5f0c6d67809cf58ac1187d9bf958 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 12:47:47 -0800
Subject: [PATCH 11/25] Remove summarize-results note from CONTRIBUTING.md

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 CONTRIBUTING.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index eeb3eb45e3..68e733de3b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -96,8 +96,6 @@ All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`,
 - If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern). The pipeline wrapper should **only** handle cluster I/O (SSH downloads, mount resolution), then delegate to core for all local logic.
 - If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
 
-> **Note:** `summarize-results` currently straddles core and pipeline (it downloads files from the cluster and processes them locally). A clean separation is tracked in [issue #779](https://github.com/NVIDIA-NeMo/Skills/issues/779#issuecomment-3344395623).
-
 ### Keep the code elegant
 When adding new features, try to keep the code simple and elegant.
 - Can you reuse / extend an existing functionality?

From d1c4195991d7e8e7f2d548361c35eece7abbb3f0 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 12:54:09 -0800
Subject: [PATCH 12/25] Properly eliminate duplicated import logic from
 pipeline/dataset.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pipeline no longer calls importlib.import_module or add_to_path
directly — all import/module-resolution logic lives in core.

Pipeline's only responsibilities are now:
- Local executor: unmount paths via get_unmounted_path, then
  delegate to core, then map returned paths back to mounted form
- Remote executor: SSH download via cluster_download_file for
  custom data_dir or cluster-type extra_datasets

Addresses review comment #7 on PR #1229.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/pipeline/dataset.py | 112 +++++++++++++++++++-------------
 1 file changed, 67 insertions(+), 45 deletions(-)

diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index 0702136b57..cddf6437e5 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -14,21 +14,22 @@
 
 """Cluster-aware dataset loading.
 
-This module adds cluster support (SSH downloads, mount path resolution)
-on top of nemo_skills.dataset.utils. The pipeline layer ONLY handles
-cluster I/O — all local import/resolution logic lives in core.
+This module adds two capabilities on top of nemo_skills.dataset.utils:
+1. Mount-path resolution for local executors (get_unmounted_path)
+2. SSH download for remote executors (cluster_download_file)
+
+All import/module-resolution logic lives in core. Pipeline never calls
+importlib.import_module or add_to_path directly.
 
 For local-only dataset loading, use nemo_skills.dataset.utils directly.
 """
 
-import importlib
 import os
 import tempfile
 from pathlib import Path
 
 from nemo_skills.dataset.utils import (
     ExtraDatasetType,
-    add_to_path,
     import_from_path,
 )
 from nemo_skills.dataset.utils import (
@@ -37,8 +38,8 @@
 from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
 
 
-def _get_dataset_module_from_cluster(cluster_config, mounted_path):
-    """Download and import a dataset module from a remote cluster."""
+def _download_and_import(cluster_config, mounted_path):
+    """Download a dataset module from a remote cluster via SSH and import it."""
     with tempfile.TemporaryDirectory() as tmpdir:
         tmp_path = str(Path(tmpdir) / "init.py")
         cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
@@ -52,50 +53,71 @@ def _get_dataset_module_from_cluster(cluster_config, mounted_path):
         return import_from_path(tmp_path)
 
 
-def _get_cluster_dataset_module(dataset, data_dir, cluster_config):
-    """Load dataset module when running on a cluster.
-
-    Handles cluster-specific cases: local executor with mount paths,
-    and remote executor with SSH downloads.
-    """
-    if data_dir is None:
-        data_path = "/nemo_run/code/nemo_skills/dataset"
-        dataset_module = importlib.import_module(f"nemo_skills.dataset.{dataset}")
-        return dataset_module, data_path, False
-
-    data_path = data_dir
-    if cluster_config["executor"] == "local":
-        with add_to_path(get_unmounted_path(cluster_config, data_dir)):
-            dataset_module = importlib.import_module(dataset)
-        return dataset_module, data_path, False
-
-    dataset = dataset.replace(".", "/")
-    dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset}/__init__.py")
-    return dataset_module, data_path, True
-
-
 def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None):
     """Load dataset module with optional cluster support.
 
     This is the cluster-aware version of nemo_skills.dataset.utils.get_dataset_module.
-    If cluster_config is None or executor is "none", delegates entirely to core.
 
-    Search priority:
-    1. data_dir (or ``nemo_skills.dataset`` if None) folder
-    2. extra_datasets parameter if defined
-    3. ``NEMO_SKILLS_EXTRA_DATASETS`` environment variable
+    - No cluster / none executor: delegates entirely to core.
+    - Local executor: unmounts paths, then delegates to core.
+    - Remote executor: downloads from cluster via SSH when needed.
     """
     if cluster_config is None or cluster_config["executor"] in (None, "none"):
-        # No cluster — delegate entirely to core
         return _get_local_dataset_module(
             dataset, data_dir, extra_datasets=extra_datasets, extra_datasets_type=extra_datasets_type
         )
 
-    # Cluster path: try primary location first
-    try:
-        return _get_cluster_dataset_module(dataset, data_dir, cluster_config)
-    except ModuleNotFoundError:
-        pass
+    if cluster_config["executor"] == "local":
+        return _get_local_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type)
+
+    return _get_remote_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type)
+
+
+def _get_local_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type):
+    """Local executor: unmount paths, then delegate entirely to core."""
+    local_data_dir = get_unmounted_path(cluster_config, data_dir) if data_dir else None
+    local_extra = get_unmounted_path(cluster_config, extra_datasets) if extra_datasets else None
+
+    dataset_module, resolved_path, _ = _get_local_dataset_module(
+        dataset,
+        local_data_dir,
+        extra_datasets=local_extra,
+        extra_datasets_type=extra_datasets_type,
+    )
+
+    # Core returns the unmounted path — map it back to the mounted path
+    # so pipeline callers get paths valid inside the container.
+    if local_data_dir and resolved_path == local_data_dir:
+        return dataset_module, data_dir, False
+    if local_extra and resolved_path == local_extra:
+        return dataset_module, extra_datasets, False
+    return dataset_module, resolved_path, False
+
+
+def _get_remote_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type):
+    """Remote executor: download from cluster via SSH when needed.
+
+    Standard built-in datasets (data_dir=None) are available locally
+    even on a remote executor since they ship with the code package.
+    Only custom data_dir / cluster extra_datasets need SSH download.
+    """
+    # Try primary location
+    if data_dir is not None:
+        # Custom data_dir on remote cluster — must download
+        try:
+            dataset_name = dataset.replace(".", "/")
+            dataset_module = _download_and_import(cluster_config, f"{data_dir}/{dataset_name}/__init__.py")
+            return dataset_module, data_dir, True
+        except RuntimeError:
+            pass
+    else:
+        # Standard dataset — import locally via core (no cluster-type extra_datasets here,
+        # that's handled in the fallback below)
+        local_extra = extra_datasets if extra_datasets_type != ExtraDatasetType.cluster else None
+        try:
+            return _get_local_dataset_module(dataset, extra_datasets=local_extra)
+        except RuntimeError:
+            pass
 
     # Fallback to extra_datasets
     extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
@@ -103,14 +125,14 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datase
         raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
 
     if extra_datasets_type == ExtraDatasetType.cluster:
-        # Cluster extra_datasets: download from remote
-        dataset = dataset.replace(".", "/")
-        dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{extra_datasets}/{dataset}/__init__.py")
+        dataset_name = dataset.replace(".", "/")
+        dataset_module = _download_and_import(cluster_config, f"{extra_datasets}/{dataset_name}/__init__.py")
         return dataset_module, extra_datasets, True
 
-    # Local extra_datasets: delegate to core (handles add_to_path + import)
+    # Local extra_datasets on remote executor — delegate to core
     try:
-        return _get_local_dataset_module(dataset, data_dir=extra_datasets)
+        dataset_module, _, _ = _get_local_dataset_module(dataset, data_dir=extra_datasets)
+        return dataset_module, extra_datasets, False
     except RuntimeError:
         raise RuntimeError(
             f"Dataset {dataset} not found in any of the searched locations: "

From b7e258ff225f8d2186775740f4fa2b774763358e Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 13:01:31 -0800
Subject: [PATCH 13/25] Simplify pipeline/dataset.py to a single thin function

Collapse 3 helper functions into one download helper + one main
function. Pipeline only does two things: unmount paths (local
executor) and SSH download (remote executor). All import logic
delegates to core.

140 -> 78 lines.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/pipeline/dataset.py | 125 +++++++++-----------------------
 1 file changed, 34 insertions(+), 91 deletions(-)

diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index cddf6437e5..9d12ad8c39 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -14,127 +14,70 @@
 
 """Cluster-aware dataset loading.
 
-This module adds two capabilities on top of nemo_skills.dataset.utils:
-1. Mount-path resolution for local executors (get_unmounted_path)
-2. SSH download for remote executors (cluster_download_file)
-
-All import/module-resolution logic lives in core. Pipeline never calls
-importlib.import_module or add_to_path directly.
-
-For local-only dataset loading, use nemo_skills.dataset.utils directly.
+Adds mount-path resolution and SSH download on top of core's
+nemo_skills.dataset.utils. All import logic lives in core.
 """
 
 import os
 import tempfile
 from pathlib import Path
 
-from nemo_skills.dataset.utils import (
-    ExtraDatasetType,
-    import_from_path,
-)
-from nemo_skills.dataset.utils import (
-    get_dataset_module as _get_local_dataset_module,
-)
+from nemo_skills.dataset.utils import ExtraDatasetType, import_from_path
+from nemo_skills.dataset.utils import get_dataset_module as _get_local_dataset_module
 from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
 
 
-def _download_and_import(cluster_config, mounted_path):
-    """Download a dataset module from a remote cluster via SSH and import it."""
+def _download_and_import(cluster_config, base_path, dataset):
+    """SSH-download a dataset __init__.py from the cluster and import it."""
+    dataset_path = dataset.replace(".", "/")
+    mounted_init = f"{base_path}/{dataset_path}/__init__.py"
     with tempfile.TemporaryDirectory() as tmpdir:
         tmp_path = str(Path(tmpdir) / "init.py")
-        cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
         try:
-            cluster_download_file(cluster_config, cluster_dataset_path, tmp_path)
+            cluster_download_file(cluster_config, get_unmounted_path(cluster_config, mounted_init), tmp_path)
         except FileNotFoundError:
             raise RuntimeError(
-                f"Init file {mounted_path} not found on the cluster. "
-                f"Please check the dataset name you're using. Did you forget to run prepare data commands?"
+                f"Dataset {dataset} not found at {mounted_init} on the cluster. "
+                "Did you forget to run prepare data commands?"
             )
         return import_from_path(tmp_path)
 
 
 def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_datasets=None, extra_datasets_type=None):
-    """Load dataset module with optional cluster support.
-
-    This is the cluster-aware version of nemo_skills.dataset.utils.get_dataset_module.
-
-    - No cluster / none executor: delegates entirely to core.
-    - Local executor: unmounts paths, then delegates to core.
-    - Remote executor: downloads from cluster via SSH when needed.
-    """
+    """Cluster-aware dataset loading. Delegates to core for all import logic."""
+    # No cluster — pass through to core
     if cluster_config is None or cluster_config["executor"] in (None, "none"):
         return _get_local_dataset_module(
             dataset, data_dir, extra_datasets=extra_datasets, extra_datasets_type=extra_datasets_type
         )
 
-    if cluster_config["executor"] == "local":
-        return _get_local_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type)
+    is_remote = cluster_config["executor"] not in ("local", None, "none")
+    extra = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
 
-    return _get_remote_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type)
+    # Remote + paths on the cluster: SSH download
+    if is_remote and data_dir:
+        return _download_and_import(cluster_config, data_dir, dataset), data_dir, True
 
+    if is_remote and extra_datasets_type == ExtraDatasetType.cluster and extra:
+        try:
+            return _get_local_dataset_module(dataset)
+        except RuntimeError:
+            return _download_and_import(cluster_config, extra, dataset), extra, True
 
-def _get_local_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type):
-    """Local executor: unmount paths, then delegate entirely to core."""
-    local_data_dir = get_unmounted_path(cluster_config, data_dir) if data_dir else None
-    local_extra = get_unmounted_path(cluster_config, extra_datasets) if extra_datasets else None
+    # Remote + local paths: delegate to core as-is
+    if is_remote:
+        return _get_local_dataset_module(
+            dataset, data_dir, extra_datasets=extra, extra_datasets_type=extra_datasets_type
+        )
 
+    # Local executor: unmount paths, delegate to core, remap returned path
+    local_dir = get_unmounted_path(cluster_config, data_dir) if data_dir else None
+    local_extra = get_unmounted_path(cluster_config, extra) if extra else None
     dataset_module, resolved_path, _ = _get_local_dataset_module(
         dataset,
-        local_data_dir,
+        local_dir,
         extra_datasets=local_extra,
         extra_datasets_type=extra_datasets_type,
     )
-
-    # Core returns the unmounted path — map it back to the mounted path
-    # so pipeline callers get paths valid inside the container.
-    if local_data_dir and resolved_path == local_data_dir:
-        return dataset_module, data_dir, False
-    if local_extra and resolved_path == local_extra:
-        return dataset_module, extra_datasets, False
-    return dataset_module, resolved_path, False
-
-
-def _get_remote_executor_dataset(dataset, data_dir, cluster_config, extra_datasets, extra_datasets_type):
-    """Remote executor: download from cluster via SSH when needed.
-
-    Standard built-in datasets (data_dir=None) are available locally
-    even on a remote executor since they ship with the code package.
-    Only custom data_dir / cluster extra_datasets need SSH download.
-    """
-    # Try primary location
-    if data_dir is not None:
-        # Custom data_dir on remote cluster — must download
-        try:
-            dataset_name = dataset.replace(".", "/")
-            dataset_module = _download_and_import(cluster_config, f"{data_dir}/{dataset_name}/__init__.py")
-            return dataset_module, data_dir, True
-        except RuntimeError:
-            pass
-    else:
-        # Standard dataset — import locally via core (no cluster-type extra_datasets here,
-        # that's handled in the fallback below)
-        local_extra = extra_datasets if extra_datasets_type != ExtraDatasetType.cluster else None
-        try:
-            return _get_local_dataset_module(dataset, extra_datasets=local_extra)
-        except RuntimeError:
-            pass
-
-    # Fallback to extra_datasets
-    extra_datasets = extra_datasets or os.environ.get("NEMO_SKILLS_EXTRA_DATASETS")
-    if extra_datasets is None:
-        raise RuntimeError(f"Dataset {dataset} not found in {data_dir if data_dir else 'nemo_skills.dataset'}")
-
-    if extra_datasets_type == ExtraDatasetType.cluster:
-        dataset_name = dataset.replace(".", "/")
-        dataset_module = _download_and_import(cluster_config, f"{extra_datasets}/{dataset_name}/__init__.py")
-        return dataset_module, extra_datasets, True
-
-    # Local extra_datasets on remote executor — delegate to core
-    try:
-        dataset_module, _, _ = _get_local_dataset_module(dataset, data_dir=extra_datasets)
-        return dataset_module, extra_datasets, False
-    except RuntimeError:
-        raise RuntimeError(
-            f"Dataset {dataset} not found in any of the searched locations: "
-            f"{data_dir if data_dir else 'nemo_skills.dataset'}, {extra_datasets}"
-        )
+    mounted = {local_dir: data_dir, local_extra: extra}.get(resolved_path, resolved_path)
+    return dataset_module, mounted, False

From c5d12cf71db1c8e0b5647b17a3a7295edc08f7c2 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 13:22:04 -0800
Subject: [PATCH 14/25] Move wandb from pipeline to core

wandb is used by summarize-results which is core functionality,
not just pipeline/orchestration. Move it to core/requirements.txt.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 CONTRIBUTING.md           | 4 ++--
 core/requirements.txt     | 1 +
 requirements/pipeline.txt | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 68e733de3b..70441531e1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -72,7 +72,7 @@ There is no separate `main.txt` — `pyproject.toml` composes the default instal
 **Boundary definition:**
 
 - **Core** = everything needed to run inference + evaluation locally (including all benchmark evaluator deps)
-- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `wandb`, `click`, `nemo-evaluator-launcher`)
+- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `click`, `nemo-evaluator-launcher`)
 
 All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`, `func-timeout`) go in `core/requirements.txt`. Eventually these should migrate to JIT (just-in-time) install so that benchmark deps are installed on demand at runtime, but until that is implemented, they must be in core so evaluators do not crash at runtime.
 
@@ -83,7 +83,7 @@ All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`,
 - `sacrebleu` -> `core/requirements.txt` (used by translation benchmark evaluator)
 - `faiss-cpu` -> `core/requirements.txt` (used by BFCL benchmark evaluator)
 - `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration)
-- `wandb` -> `requirements/pipeline.txt` (experiment tracking for cluster jobs)
+- `wandb` -> `core/requirements.txt` (used by summarize-results)
 
 **Examples of mistakes to avoid:**
 
diff --git a/core/requirements.txt b/core/requirements.txt
index da1efe8af7..33840ca802 100644
--- a/core/requirements.txt
+++ b/core/requirements.txt
@@ -36,3 +36,4 @@ serpapi
 sympy
 tqdm
 transformers
+wandb
diff --git a/requirements/pipeline.txt b/requirements/pipeline.txt
index eb839a91da..f9af5acf8a 100644
--- a/requirements/pipeline.txt
+++ b/requirements/pipeline.txt
@@ -5,4 +5,3 @@ click < 8.2.0  # https://github.com/ai-dynamo/dynamo/issues/1039
 nemo-evaluator-launcher<0.1.47
 nemo_run @ git+https://github.com/NVIDIA-NeMo/Run
 typer >= 0.13
-wandb

From 3b025ecf3e7872ec412aedd90aeb24b573234a89 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 13:34:42 -0800
Subject: [PATCH 15/25] Fix Dockerfile.nemo-skills to use core + pipeline
 requirements

The Dockerfile referenced the deleted requirements/main.txt.
Update to install from core/requirements.txt + pipeline.txt,
matching how pyproject.toml now composes dependencies.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 dockerfiles/Dockerfile.nemo-skills | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dockerfiles/Dockerfile.nemo-skills b/dockerfiles/Dockerfile.nemo-skills
index 6300512339..7455867c9b 100644
--- a/dockerfiles/Dockerfile.nemo-skills
+++ b/dockerfiles/Dockerfile.nemo-skills
@@ -54,12 +54,13 @@ RUN pip install langdetect absl-py immutabledict nltk ipython && \
 
 # we aren't copying main nemo_skills folder as it will always be mounted from host
 # but we do want to install all requirements in the container directly
-RUN mkdir -p /opt/NeMo-Skills/requirements
+RUN mkdir -p /opt/NeMo-Skills/requirements /opt/NeMo-Skills/core
 COPY pyproject.toml README.md /opt/NeMo-Skills/
 COPY requirements /opt/NeMo-Skills/requirements/
+COPY core/requirements.txt /opt/NeMo-Skills/core/requirements.txt
 # installing sdp in container only
 RUN pip install git+https://github.com/NVIDIA/NeMo-speech-data-processor@29b9b1ec0ceaf3ffa441c1d01297371b3f8e11d2
 ARG CACHEBUST=3
-RUN pip install --no-cache-dir -r /opt/NeMo-Skills/requirements/main.txt
+RUN pip install --no-cache-dir -r /opt/NeMo-Skills/core/requirements.txt -r /opt/NeMo-Skills/requirements/pipeline.txt
 # Fix http mismatch between lepton and dggs by manually downloading dggs here
 RUN pip install ddgs

From 836160f568b962494b574ac9e0dc49fdee37ee8c Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 17 Feb 2026 16:56:47 -0800
Subject: [PATCH 16/25] fix: use nemo_run import guard instead of package
 metadata

The Docker container installs deps from requirements files without
running `pip install .`, so package metadata is not available.
Checking for nemo_run import instead correctly detects whether
pipeline deps are installed.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/pipeline/__init__.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/nemo_skills/pipeline/__init__.py b/nemo_skills/pipeline/__init__.py
index a0960f5a2b..13aad30063 100644
--- a/nemo_skills/pipeline/__init__.py
+++ b/nemo_skills/pipeline/__init__.py
@@ -12,11 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from importlib.metadata import PackageNotFoundError, metadata
-
 try:
-    metadata("nemo-skills")
-except PackageNotFoundError:
+    import nemo_run  # noqa: F401
+except ModuleNotFoundError:
     raise ImportError(
         "nemo-skills-core is installed but pipeline functionality requires the full package. "
         "Install it with: pip install nemo-skills"

From cf4b94cfa1aa0294680c9c960191049a949d2712 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 14:37:14 -0800
Subject: [PATCH 17/25] fix: remove huggingface_hub<1 pin from BFCL
 requirements

The BFCL eval venv uses --system-site-packages and pins
huggingface_hub<1, which downgrades the system's huggingface_hub 1.x
to 0.x. This breaks transformers (from system packages) which needs
is_offline_mode only available in huggingface_hub>=1.0.

Gorilla's own BFCL does not pin huggingface_hub, so removing the
constraint is safe.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 nemo_skills/inference/eval/bfcl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo_skills/inference/eval/bfcl.py b/nemo_skills/inference/eval/bfcl.py
index c168d24699..966106627b 100644
--- a/nemo_skills/inference/eval/bfcl.py
+++ b/nemo_skills/inference/eval/bfcl.py
@@ -54,7 +54,7 @@
     "tqdm",
     "numpy==1.26.4",
     "pandas",
-    "huggingface_hub<1",  # pin to <1 to satisfy a requirement from another package
+    "huggingface_hub",
     "pydantic>=2.8.2",
     "python-dotenv>=1.0.1",
     "tree_sitter==0.21.3",

From 6255b1a72d0fb9df5ff182e96d40dbd73c44bf4c Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 17:22:48 -0800
Subject: [PATCH 18/25] address feedback from code review

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 .github/workflows/tests.yml        |  2 --
 CONTRIBUTING.md                    | 46 ++++--------------------------
 docs/basics/installation.md        | 27 ++----------------
 requirements/pipeline.txt          |  2 +-
 tests/test_dependency_isolation.py | 31 +++++++++++++-------
 5 files changed, 29 insertions(+), 79 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index bb5249da03..837ca2d58b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,8 +41,6 @@ jobs:
       with:
         python-version: "3.10"
         cache: pip
-    - name: Install uv
-      uses: astral-sh/setup-uv@v4
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 70441531e1..db815420ee 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,50 +51,14 @@ The following things are required when adding new benchmarks
 
 ### Respect the Core / Pipeline dependency boundary
 
-NeMo Skills is split into **Core** (agent runtime) and **Pipeline** (orchestration). The rule is simple:
+NeMo Skills is split into **Core** (inference, evaluation, tools, benchmarks) and **Pipeline** (CLI, cluster orchestration). The one-way rule:
 
-```
-Pipeline can import from Core.
-Core CANNOT import from Pipeline.
-```
-
-Core modules are everything under `nemo_skills/` **except** `nemo_skills/pipeline/`. They must never have top-level imports from `nemo_skills.pipeline` or `nemo_run`. This boundary is enforced by `tests/test_dependency_isolation.py` which verifies that core modules import successfully without pipeline dependencies installed.
-
-**When adding a new dependency**, put it in the right requirements file:
-
-| If the dependency is needed for... | Add it to |
-|---|---|
-| Inference, evaluation, tool calling, any benchmark evaluator | `core/requirements.txt` |
-| CLI commands (`ns`), cluster orchestration, experiment tracking | `requirements/pipeline.txt` |
-
-There is no separate `main.txt` — `pyproject.toml` composes the default install from `core/requirements.txt` + `requirements/pipeline.txt`. Each dependency lives in exactly one file.
-
-**Boundary definition:**
-
-- **Core** = everything needed to run inference + evaluation locally (including all benchmark evaluator deps)
-- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `click`, `nemo-evaluator-launcher`)
-
-All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`, `func-timeout`) go in `core/requirements.txt`. Eventually these should migrate to JIT (just-in-time) install so that benchmark deps are installed on demand at runtime, but until that is implemented, they must be in core so evaluators do not crash at runtime.
-
-**Examples of correct placement:**
-
-- `httpx` -> `core/requirements.txt` (used by model inference clients)
-- `sympy` -> `core/requirements.txt` (used by math graders)
-- `sacrebleu` -> `core/requirements.txt` (used by translation benchmark evaluator)
-- `faiss-cpu` -> `core/requirements.txt` (used by BFCL benchmark evaluator)
-- `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration)
-- `wandb` -> `core/requirements.txt` (used by summarize-results)
-
-**Examples of mistakes to avoid:**
-
-- Adding `nemo_run` to `core/requirements.txt` -- it is a pipeline/orchestration dependency, core must not depend on it.
-- Adding `typer` to `core/requirements.txt` -- it is the CLI framework, only used by the pipeline layer.
+- **Pipeline** can import from **Core**
+- **Core** CANNOT import from **Pipeline** (no `nemo_run`, no `nemo_skills.pipeline`)
 
-**When writing new core code:**
+When adding dependencies: inference/evaluation/benchmark deps go in `core/requirements.txt`, orchestration deps go in `requirements/pipeline.txt`. This boundary is enforced by `tests/test_dependency_isolation.py`.
 
-- If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it.
-- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern). The pipeline wrapper should **only** handle cluster I/O (SSH downloads, mount resolution), then delegate to core for all local logic.
-- If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
+For full details (examples, common patterns, what to avoid), see [Dependency Boundary Guide](docs/core-pipeline-boundary.md).
 
 ### Keep the code elegant
 When adding new features, try to keep the code simple and elegant.
diff --git a/docs/basics/installation.md b/docs/basics/installation.md
index 6d20e93684..a04d9602ba 100644
--- a/docs/basics/installation.md
+++ b/docs/basics/installation.md
@@ -47,28 +47,7 @@ pip install -e core/
 pip install -e ".[dev]"
 ```
 
-## Core / Pipeline architecture boundary
+## Core / Pipeline boundary
 
-The codebase enforces a one-way dependency rule: pipeline modules can import
-from core, but core modules must not import from pipeline.
-
-This boundary is enforced by `tests/test_dependency_isolation.py` which creates
-fresh virtualenvs and verifies that core modules import successfully without
-pipeline dependencies installed.
-
-### Dataset loading example
-
-The boundary shows up concretely in dataset loading:
-
-```python
-# Core: local-only dataset loading (no cluster deps)
-from nemo_skills.dataset.utils import get_dataset_module
-module, path, on_cluster = get_dataset_module("gsm8k")
-
-# Pipeline: cluster-aware dataset loading (SSH tunnels, mount resolution)
-from nemo_skills.pipeline.dataset import get_dataset_module
-module, path, on_cluster = get_dataset_module("gsm8k", cluster_config=cfg)
-```
-
-If you pass `cluster_config` to the core version, it will emit a
-`DeprecationWarning` and redirect to the pipeline version.
+For contributors: the codebase enforces a one-way dependency rule between core
+and pipeline modules. See the [Dependency Boundary Guide](../core-pipeline-boundary.md) for details.
diff --git a/requirements/pipeline.txt b/requirements/pipeline.txt
index f9af5acf8a..8cc278c7ed 100644
--- a/requirements/pipeline.txt
+++ b/requirements/pipeline.txt
@@ -1,5 +1,5 @@
 # Pipeline/orchestration dependencies (CLI, cluster management, experiment tracking).
-# These are additional to core.txt.
+# These are additional to core/requirements.txt.
 
 click < 8.2.0  # https://github.com/ai-dynamo/dynamo/issues/1039
 nemo-evaluator-launcher<0.1.47
diff --git a/tests/test_dependency_isolation.py b/tests/test_dependency_isolation.py
index 928f28b1ee..c32355c6e6 100644
--- a/tests/test_dependency_isolation.py
+++ b/tests/test_dependency_isolation.py
@@ -22,22 +22,31 @@
 nemo_run is blocked via a sys.modules override.
 """
 
+import pathlib
 import subprocess
 import sys
 
 import pytest
 
-# Core modules that must be importable without nemo_run / pipeline
-CORE_MODULES = [
-    "nemo_skills.inference.generate",
-    "nemo_skills.inference.model",
-    "nemo_skills.evaluation.evaluator",
-    "nemo_skills.evaluation.math_grader",
-    "nemo_skills.dataset.utils",
-    "nemo_skills.mcp.tool_manager",
-    "nemo_skills.code_execution.sandbox",
-    "nemo_skills.utils",
-]
+
+def _discover_core_modules():
+    """Find all importable nemo_skills subpackages except pipeline."""
+    import nemo_skills
+
+    root = pathlib.Path(nemo_skills.__file__).parent
+    modules = []
+    for init in sorted(root.rglob("__init__.py")):
+        rel = init.parent.relative_to(root)
+        parts = rel.parts
+        if not parts:  # root __init__.py
+            continue
+        if parts[0] == "pipeline" or "__pycache__" in parts:
+            continue
+        modules.append("nemo_skills." + ".".join(parts))
+    return modules
+
+
+CORE_MODULES = _discover_core_modules()
 
 
 @pytest.mark.parametrize("module_name", CORE_MODULES)

From b76b8bdd50557dc41d70a7e7dd12b2adb3a65bb8 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 17:24:46 -0800
Subject: [PATCH 19/25] add dependency boundary guide and requirements symlink

Move detailed core/pipeline boundary docs from CONTRIBUTING.md and
installation.md into docs/core-pipeline-boundary.md. Add symlink at
requirements/core.txt pointing to core/requirements.txt for discoverability.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 docs/core-pipeline-boundary.md | 64 ++++++++++++++++++++++++++++++++++
 requirements/core.txt          |  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 docs/core-pipeline-boundary.md
 create mode 120000 requirements/core.txt

diff --git a/docs/core-pipeline-boundary.md b/docs/core-pipeline-boundary.md
new file mode 100644
index 0000000000..8d1f7cf20c
--- /dev/null
+++ b/docs/core-pipeline-boundary.md
@@ -0,0 +1,64 @@
+# Core / Pipeline Dependency Boundary
+
+NeMo Skills is split into **Core** (agent runtime) and **Pipeline** (orchestration). The rule is simple:
+
+```
+Pipeline can import from Core.
+Core CANNOT import from Pipeline.
+```
+
+Core modules are everything under `nemo_skills/` **except** `nemo_skills/pipeline/`. They must never have top-level imports from `nemo_skills.pipeline` or `nemo_run`. This boundary is enforced by `tests/test_dependency_isolation.py` which verifies that core modules import successfully when `nemo_run` is blocked.
+
+## Dependency placement
+
+When adding a new dependency, put it in the right requirements file:
+
+| If the dependency is needed for... | Add it to |
+|---|---|
+| Inference, evaluation, tool calling, any benchmark evaluator | `core/requirements.txt` |
+| CLI commands (`ns`), cluster orchestration, experiment tracking | `requirements/pipeline.txt` |
+
+There is no separate `main.txt` — `pyproject.toml` composes the default install from `core/requirements.txt` + `requirements/pipeline.txt`. Each dependency lives in exactly one file.
+
+**Boundary definition:**
+
+- **Core** = everything needed to run inference + evaluation locally (including all benchmark evaluator deps)
+- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `click`, `nemo-evaluator-launcher`)
+
+All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`, `func-timeout`) go in `core/requirements.txt`. Eventually these should migrate to JIT (just-in-time) install so that benchmark deps are installed on demand at runtime, but until that is implemented, they must be in core so evaluators do not crash at runtime.
+
+## Examples of correct placement
+
+- `httpx` -> `core/requirements.txt` (used by model inference clients)
+- `sympy` -> `core/requirements.txt` (used by math graders)
+- `sacrebleu` -> `core/requirements.txt` (used by translation benchmark evaluator)
+- `faiss-cpu` -> `core/requirements.txt` (used by BFCL benchmark evaluator)
+- `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration)
+- `wandb` -> `core/requirements.txt` (used by summarize-results)
+
+## Examples of mistakes to avoid
+
+- Adding `nemo_run` to `core/requirements.txt` -- it is a pipeline/orchestration dependency, core must not depend on it.
+- Adding `typer` to `core/requirements.txt` -- it is the CLI framework, only used by the pipeline layer.
+
+## Writing new core code
+
+- If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it.
+- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern). The pipeline wrapper should **only** handle cluster I/O (SSH downloads, mount resolution), then delegate to core for all local logic.
+- If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
+
+## Dataset loading example
+
+The boundary shows up concretely in dataset loading:
+
+```python
+# Core: local-only dataset loading (no cluster deps)
+from nemo_skills.dataset.utils import get_dataset_module
+module, path, on_cluster = get_dataset_module("gsm8k")
+
+# Pipeline: cluster-aware dataset loading (SSH tunnels, mount resolution)
+from nemo_skills.pipeline.dataset import get_dataset_module
+module, path, on_cluster = get_dataset_module("gsm8k", cluster_config=cfg)
+```
+
+If you pass `cluster_config` to the core version, it will emit a `DeprecationWarning` and redirect to the pipeline version.
diff --git a/requirements/core.txt b/requirements/core.txt
new file mode 120000
index 0000000000..f8cfc00bc7
--- /dev/null
+++ b/requirements/core.txt
@@ -0,0 +1 @@
+../core/requirements.txt
\ No newline at end of file

From 305cde768f1f86ea24be21f1ab58b2165a9356f8 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 17:52:34 -0800
Subject: [PATCH 20/25] maint: add back core dependency guidelines and adjust
 links

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 CONTRIBUTING.md                                  |  2 +-
 docs/core-pipeline-boundary.md => core/README.md | 13 ++++++-------
 docs/basics/installation.md                      |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)
 rename docs/core-pipeline-boundary.md => core/README.md (75%)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index db815420ee..b4588c8675 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -58,7 +58,7 @@ NeMo Skills is split into **Core** (inference, evaluation, tools, benchmarks) an
 
 When adding dependencies: inference/evaluation/benchmark deps go in `core/requirements.txt`, orchestration deps go in `requirements/pipeline.txt`. This boundary is enforced by `tests/test_dependency_isolation.py`.
 
-For full details (examples, common patterns, what to avoid), see [Dependency Boundary Guide](docs/core-pipeline-boundary.md).
+For full details (examples, common patterns, what to avoid), see [Dependency Boundary Guide](core/README.md).
 
 ### Keep the code elegant
 When adding new features, try to keep the code simple and elegant.
diff --git a/docs/core-pipeline-boundary.md b/core/README.md
similarity index 75%
rename from docs/core-pipeline-boundary.md
rename to core/README.md
index 8d1f7cf20c..b71a4eca99 100644
--- a/docs/core-pipeline-boundary.md
+++ b/core/README.md
@@ -44,8 +44,8 @@ All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`,
 ## Writing new core code
 
 - If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it.
-- If you have a function that works locally but *also* needs a cluster variant, put the local version in core and a cluster-aware wrapper in `nemo_skills/pipeline/` (see `pipeline/dataset.py` for the pattern). The pipeline wrapper should **only** handle cluster I/O (SSH downloads, mount resolution), then delegate to core for all local logic.
-- If you absolutely must reference pipeline code from core for backwards compatibility, use a lazy import inside a function body with a `DeprecationWarning` (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
+- If you have a function that works locally but *also* needs a cluster variant, keep both paths in the same function but use a **lazy import** for the pipeline code inside the branch that needs it (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import.
+- The pipeline layer (`nemo_skills/pipeline/`) can provide thin wrappers or re-exports for convenience (see `pipeline/dataset.py`), but all local logic should live in core.
 
 ## Dataset loading example
 
@@ -54,11 +54,10 @@ The boundary shows up concretely in dataset loading:
 ```python
 # Core: local-only dataset loading (no cluster deps)
 from nemo_skills.dataset.utils import get_dataset_module
-module, path, on_cluster = get_dataset_module("gsm8k")
+module, data_path = get_dataset_module("gsm8k")
 
-# Pipeline: cluster-aware dataset loading (SSH tunnels, mount resolution)
-from nemo_skills.pipeline.dataset import get_dataset_module
-module, path, on_cluster = get_dataset_module("gsm8k", cluster_config=cfg)
+# Cluster-aware: same function, lazy pipeline imports inside
+module, data_path = get_dataset_module("gsm8k", cluster_config=cfg)
 ```
 
-If you pass `cluster_config` to the core version, it will emit a `DeprecationWarning` and redirect to the pipeline version.
+When `cluster_config` is provided, the function lazily imports from `nemo_skills.pipeline.utils` inside the code path that needs it — the top-level module stays free of pipeline dependencies.
diff --git a/docs/basics/installation.md b/docs/basics/installation.md
index a04d9602ba..34c7496d17 100644
--- a/docs/basics/installation.md
+++ b/docs/basics/installation.md
@@ -50,4 +50,4 @@ pip install -e ".[dev]"
 ## Core / Pipeline boundary
 
 For contributors: the codebase enforces a one-way dependency rule between core
-and pipeline modules. See the [Dependency Boundary Guide](../core-pipeline-boundary.md) for details.
+and pipeline modules. See the [Dependency Boundary Guide](https://github.com/NVIDIA-NeMo/Skills/blob/main/core/README.md) for details.

From 28de0482f3e34c28d02387df559d1247a44029a1 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 18:08:21 -0800
Subject: [PATCH 21/25] fix dataset resolution logic after merge

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 core/README.md                     |  5 ++-
 nemo_skills/dataset/utils.py       | 41 ++++---------------
 nemo_skills/pipeline/dataset.py    | 63 +++++++++++++++++++++++++++---
 nemo_skills/pipeline/utils/eval.py |  3 +-
 4 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/core/README.md b/core/README.md
index b71a4eca99..e789c384cb 100644
--- a/core/README.md
+++ b/core/README.md
@@ -56,8 +56,9 @@ The boundary shows up concretely in dataset loading:
 from nemo_skills.dataset.utils import get_dataset_module
 module, data_path = get_dataset_module("gsm8k")
 
-# Cluster-aware: same function, lazy pipeline imports inside
+# Pipeline: cluster-aware wrapper (SSH downloads, mount resolution)
+from nemo_skills.pipeline.dataset import get_dataset_module
 module, data_path = get_dataset_module("gsm8k", cluster_config=cfg)
 ```
 
-When `cluster_config` is provided, the function lazily imports from `nemo_skills.pipeline.utils` inside the code path that needs it — the top-level module stays free of pipeline dependencies.
+The core version has zero pipeline imports. The pipeline wrapper delegates to core for local resolution and only adds cluster-specific logic (mount-path unmounting, SSH file downloads) when needed.
diff --git a/nemo_skills/dataset/utils.py b/nemo_skills/dataset/utils.py
index 4b86641469..8074832558 100644
--- a/nemo_skills/dataset/utils.py
+++ b/nemo_skills/dataset/utils.py
@@ -17,7 +17,6 @@
 import json
 import os
 import sys
-import tempfile
 import time
 import urllib.request
 from pathlib import Path
@@ -91,22 +90,6 @@ def add_to_path(p):
         sys.path = old_path
 
 
-def _get_dataset_module_from_cluster(cluster_config, mounted_path):
-    from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
-
-    with tempfile.TemporaryDirectory() as tmpdir:
-        tmp_path = str(Path(tmpdir) / "init.py")
-        cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
-        try:
-            cluster_download_file(cluster_config, cluster_dataset_path, tmp_path)
-        except FileNotFoundError:
-            raise RuntimeError(
-                f"Init file {mounted_path} not found on the cluster. "
-                f"Please check the dataset name you're using. Did you forget to run prepare data commands?"
-            )
-        return import_from_path(tmp_path)
-
-
 def get_dataset_name(dataset):
     """Extract the canonical dataset name from a dataset identifier (short name or path)."""
     if "/" in dataset:
@@ -182,7 +165,7 @@ def get_default_dataset_module(dataset):
     return dataset_module, data_path
 
 
-def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchmark_map=None):
+def get_dataset_module(dataset, data_dir=None, extra_benchmark_map=None):
     """Get dataset module from nemo_skills.dataset, extra benchmark map, or a directory path.
 
     Resolution order:
@@ -192,7 +175,10 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchm
        - If found in exactly one, use it.
        - If found in neither, fall back to data_dir if provided.
     3. If data_dir is provided and previous resolution failed, try to load the module
-       from data_dir (locally or by downloading from cluster).
+       from data_dir locally.
+
+    For cluster-aware loading (SSH downloads, mount resolution), use
+    nemo_skills.pipeline.dataset.get_dataset_module instead.
 
     Args:
         extra_benchmark_map: Either a dict mapping short names to directory paths,
@@ -225,21 +211,10 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchm
     if found_builtin:
         return dataset_module, data_path
 
-    # Fall back to data_dir if provided
+    # Fall back to data_dir if provided (local only)
     if data_dir:
-        from nemo_skills.pipeline.utils import get_unmounted_path
-
-        dataset_as_path = dataset.replace(".", "/")
-        if cluster_config is None or cluster_config["executor"] == "none":
-            with add_to_path(data_dir):
-                dataset_module = importlib.import_module(dataset)
-        elif cluster_config["executor"] == "local":
-            with add_to_path(get_unmounted_path(cluster_config, data_dir)):
-                dataset_module = importlib.import_module(dataset)
-        else:
-            dataset_module = _get_dataset_module_from_cluster(
-                cluster_config, f"{data_dir}/{dataset_as_path}/__init__.py"
-            )
+        with add_to_path(data_dir):
+            dataset_module = importlib.import_module(dataset)
         return dataset_module, data_dir
 
     map_path = (
diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py
index 3eb62d323c..1278102905 100644
--- a/nemo_skills/pipeline/dataset.py
+++ b/nemo_skills/pipeline/dataset.py
@@ -14,10 +14,63 @@
 
 """Cluster-aware dataset loading.
 
-Re-exports get_dataset_module from core. The cluster-aware functionality
-(mount-path resolution, SSH downloads) is handled by core's get_dataset_module
-when a cluster_config is provided — pipeline-only deps are lazily imported
-inside those code paths.
+Wraps core's get_dataset_module with cluster support (mount-path resolution,
+SSH downloads). Pipeline code that needs cluster_config should import from here
+instead of nemo_skills.dataset.utils.
 """
 
-from nemo_skills.dataset.utils import get_dataset_module  # noqa: F401
+import importlib
+import tempfile
+from pathlib import Path
+
+from nemo_skills.dataset.utils import (
+    add_to_path,
+    import_from_path,
+)
+from nemo_skills.dataset.utils import (
+    get_dataset_module as _core_get_dataset_module,
+)
+from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path
+
+
+def _get_dataset_module_from_cluster(cluster_config, mounted_path):
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = str(Path(tmpdir) / "init.py")
+        cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path)
+        try:
+            cluster_download_file(cluster_config, cluster_dataset_path, tmp_path)
+        except FileNotFoundError:
+            raise RuntimeError(
+                f"Init file {mounted_path} not found on the cluster. "
+                f"Please check the dataset name you're using. Did you forget to run prepare data commands?"
+            )
+        return import_from_path(tmp_path)
+
+
+def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchmark_map=None):
+    """Cluster-aware dataset module loading.
+
+    Delegates to core's get_dataset_module for local resolution (path-based,
+    built-in, extra benchmark map). When a cluster_config is provided and the
+    dataset requires data_dir fallback, handles mount-path resolution and
+    SSH downloads.
+    """
+    # No cluster config → delegate entirely to core
+    if cluster_config is None or cluster_config.get("executor") == "none":
+        return _core_get_dataset_module(dataset, data_dir=data_dir, extra_benchmark_map=extra_benchmark_map)
+
+    # Try core resolution first (without data_dir — path, built-in, extra map)
+    try:
+        return _core_get_dataset_module(dataset, extra_benchmark_map=extra_benchmark_map)
+    except RuntimeError:
+        if not data_dir:
+            raise
+
+    # Core couldn't resolve it; use data_dir with cluster awareness
+    dataset_as_path = dataset.replace(".", "/")
+    if cluster_config["executor"] == "local":
+        with add_to_path(get_unmounted_path(cluster_config, data_dir)):
+            dataset_module = importlib.import_module(dataset)
+    else:
+        dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset_as_path}/__init__.py")
+    return dataset_module, data_dir
diff --git a/nemo_skills/pipeline/utils/eval.py b/nemo_skills/pipeline/utils/eval.py
index e0ecedbe56..ff1ab345fe 100644
--- a/nemo_skills/pipeline/utils/eval.py
+++ b/nemo_skills/pipeline/utils/eval.py
@@ -21,9 +21,10 @@
 from typing import Dict
 
 import nemo_skills.pipeline.utils as pipeline_utils
-from nemo_skills.dataset.utils import get_dataset_module, get_dataset_name, import_from_path
+from nemo_skills.dataset.utils import get_dataset_name, import_from_path
 from nemo_skills.inference import GENERATION_MODULE_MAP
 from nemo_skills.inference.generate import GenerationTask
+from nemo_skills.pipeline.dataset import get_dataset_module
 from nemo_skills.utils import compute_chunk_ids, get_logger_name
 
 LOG = logging.getLogger(get_logger_name(__file__))

From ded4a2c09e485a6f54475aeaf81bfe8679db4a70 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 18:12:33 -0800
Subject: [PATCH 22/25] remove boundary arch mention completely

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 docs/basics/installation.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/docs/basics/installation.md b/docs/basics/installation.md
index 34c7496d17..3fccbf8222 100644
--- a/docs/basics/installation.md
+++ b/docs/basics/installation.md
@@ -46,8 +46,3 @@ pip install -e core/
 # Development (everything + dev tools)
 pip install -e ".[dev]"
 ```
-
-## Core / Pipeline boundary
-
-For contributors: the codebase enforces a one-way dependency rule between core
-and pipeline modules. See the [Dependency Boundary Guide](https://github.com/NVIDIA-NeMo/Skills/blob/main/core/README.md) for details.

From d1731217e79e591f033f6a41b53059e39fcc16dc Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Thu, 19 Feb 2026 18:40:46 -0800
Subject: [PATCH 23/25] fix: use importlib for hyphenated module names in
 isolation test

Directories with hyphens (e.g., answer-judge, math-500, llama3-instruct)
cannot be imported via `import` statement. Use importlib.import_module()
which handles arbitrary module names correctly.

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 tests/test_dependency_isolation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_dependency_isolation.py b/tests/test_dependency_isolation.py
index c32355c6e6..baabcc9df9 100644
--- a/tests/test_dependency_isolation.py
+++ b/tests/test_dependency_isolation.py
@@ -53,12 +53,12 @@ def _discover_core_modules():
 def test_core_module_imports_without_nemo_run(module_name):
     """Each core module must import successfully when nemo_run is unavailable."""
     script = (
-        "import sys; "
+        "import sys, importlib; "
         # Block nemo_run from being importable
         "sys.modules['nemo_run'] = None; "
         "sys.modules['nemo_skills.pipeline'] = None; "
         "sys.modules['nemo_skills.pipeline.utils'] = None; "
-        f"import {module_name}; "
+        f"importlib.import_module('{module_name}'); "
         "print('OK')"
     )
     result = subprocess.run(

From 97bac874df848ddbc6acb5edfe691e9d28cc9e3a Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 24 Feb 2026 10:57:32 -0800
Subject: [PATCH 24/25] fix: add torchcodec to core requirements for numb3rs
 dataset

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 core/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/requirements.txt b/core/requirements.txt
index 7189055e6b..4648f38081 100644
--- a/core/requirements.txt
+++ b/core/requirements.txt
@@ -37,6 +37,7 @@ scikit-learn
 sentence_transformers
 serpapi
 sympy
+torchcodec
 tqdm
 transformers
 wandb

From cc4118c5d7b7cc61f543bbcf8eb5b90a95f11da5 Mon Sep 17 00:00:00 2001
From: George Armstrong <georgea@nvidia.com>
Date: Tue, 24 Feb 2026 12:13:38 -0800
Subject: [PATCH 25/25] bump CACHEBUST to rebuild container with torchcodec

Signed-off-by: George Armstrong <georgea@nvidia.com>
---
 dockerfiles/Dockerfile.nemo-skills | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dockerfiles/Dockerfile.nemo-skills b/dockerfiles/Dockerfile.nemo-skills
index 7464521a4f..736ef2dec7 100644
--- a/dockerfiles/Dockerfile.nemo-skills
+++ b/dockerfiles/Dockerfile.nemo-skills
@@ -61,7 +61,7 @@ COPY requirements /opt/NeMo-Skills/requirements/
 COPY core/requirements.txt /opt/NeMo-Skills/core/requirements.txt
 # installing sdp in container only
 RUN pip install git+https://github.com/NVIDIA/NeMo-speech-data-processor@29b9b1ec0ceaf3ffa441c1d01297371b3f8e11d2
-ARG CACHEBUST=3
+ARG CACHEBUST=4
 RUN pip install --no-cache-dir -r /opt/NeMo-Skills/core/requirements.txt -r /opt/NeMo-Skills/requirements/pipeline.txt
 # Fix http mismatch between lepton and dggs by manually downloading dggs here
 RUN pip install ddgs