diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4dba695e4c..b4588c8675 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,6 +49,17 @@ The following things are required when adding new benchmarks the dataset into slurm tests. This is the most comprehensive test we can do by running full evaluation on cluster with arbitrary model and check that results are as expected. +### Respect the Core / Pipeline dependency boundary + +NeMo Skills is split into **Core** (inference, evaluation, tools, benchmarks) and **Pipeline** (CLI, cluster orchestration). The one-way rule: + +- **Pipeline** can import from **Core** +- **Core** CANNOT import from **Pipeline** (no `nemo_run`, no `nemo_skills.pipeline`) + +When adding dependencies: inference/evaluation/benchmark deps go in `core/requirements.txt`, orchestration deps go in `requirements/pipeline.txt`. This boundary is enforced by `tests/test_dependency_isolation.py`. + +For full details (examples, common patterns, what to avoid), see [Dependency Boundary Guide](core/README.md). + ### Keep the code elegant When adding new features, try to keep the code simple and elegant. - Can you reuse / extend an existing functionality? diff --git a/core/README.md b/core/README.md new file mode 100644 index 0000000000..e789c384cb --- /dev/null +++ b/core/README.md @@ -0,0 +1,64 @@ +# Core / Pipeline Dependency Boundary + +NeMo Skills is split into **Core** (agent runtime) and **Pipeline** (orchestration). The rule is simple: + +``` +Pipeline can import from Core. +Core CANNOT import from Pipeline. +``` + +Core modules are everything under `nemo_skills/` **except** `nemo_skills/pipeline/`. They must never have top-level imports from `nemo_skills.pipeline` or `nemo_run`. This boundary is enforced by `tests/test_dependency_isolation.py` which verifies that core modules import successfully when `nemo_run` is blocked. + +## Dependency placement + +When adding a new dependency, put it in the right requirements file: + +| If the dependency is needed for... | Add it to | +|---|---| +| Inference, evaluation, tool calling, any benchmark evaluator | `core/requirements.txt` | +| CLI commands (`ns`), cluster orchestration, experiment tracking | `requirements/pipeline.txt` | + +There is no separate `main.txt` — `pyproject.toml` composes the default install from `core/requirements.txt` + `requirements/pipeline.txt`. Each dependency lives in exactly one file. + +**Boundary definition:** + +- **Core** = everything needed to run inference + evaluation locally (including all benchmark evaluator deps) +- **Pipeline** = orchestration-only deps (`nemo_run`, `typer`, `click`, `nemo-evaluator-launcher`) + +All benchmark-specific dependencies (e.g., `faiss-cpu`, `sacrebleu`, `datasets`, `func-timeout`) go in `core/requirements.txt`. Eventually these should migrate to JIT (just-in-time) install so that benchmark deps are installed on demand at runtime, but until that is implemented, they must be in core so evaluators do not crash at runtime. + +## Examples of correct placement + +- `httpx` -> `core/requirements.txt` (used by model inference clients) +- `sympy` -> `core/requirements.txt` (used by math graders) +- `sacrebleu` -> `core/requirements.txt` (used by translation benchmark evaluator) +- `faiss-cpu` -> `core/requirements.txt` (used by BFCL benchmark evaluator) +- `nemo_run` -> `requirements/pipeline.txt` (cluster job orchestration) +- `wandb` -> `core/requirements.txt` (used by summarize-results) + +## Examples of mistakes to avoid + +- Adding `nemo_run` to `core/requirements.txt` -- it is a pipeline/orchestration dependency, core must not depend on it. +- Adding `typer` to `core/requirements.txt` -- it is the CLI framework, only used by the pipeline layer. + +## Writing new core code + +- If you need something from `nemo_skills.pipeline`, your code probably belongs in pipeline, not core. Move it. +- If you have a function that works locally but *also* needs a cluster variant, keep both paths in the same function but use a **lazy import** for the pipeline code inside the branch that needs it (see `dataset/utils.py:get_dataset_module` for the pattern). Never add a top-level import. +- The pipeline layer (`nemo_skills/pipeline/`) can provide thin wrappers or re-exports for convenience (see `pipeline/dataset.py`), but all local logic should live in core. + +## Dataset loading example + +The boundary shows up concretely in dataset loading: + +```python +# Core: local-only dataset loading (no cluster deps) +from nemo_skills.dataset.utils import get_dataset_module +module, data_path = get_dataset_module("gsm8k") + +# Pipeline: cluster-aware wrapper (SSH downloads, mount resolution) +from nemo_skills.pipeline.dataset import get_dataset_module +module, data_path = get_dataset_module("gsm8k", cluster_config=cfg) +``` + +The core version has zero pipeline imports. The pipeline wrapper delegates to core for local resolution and only adds cluster-specific logic (mount-path unmounting, SSH file downloads) when needed. diff --git a/core/pyproject.toml b/core/pyproject.toml new file mode 100644 index 0000000000..c5ccb3bd1f --- /dev/null +++ b/core/pyproject.toml @@ -0,0 +1,53 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = [ + "setuptools", + "wheel" +] +build-backend = "setuptools.build_meta" + +[project] +dynamic = ["version", "dependencies"] + +name = "nemo-skills-core" +description = "NeMo Skills core runtime -- inference, evaluation, and tool calling" +readme = {text = "NeMo Skills core runtime for inference, evaluation, and tool calling. See https://nvidia-nemo.github.io/Skills for full documentation.", content-type = "text/plain"} +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +requires-python = ">=3.10" + +[project.urls] +homepage = "https://nvidia-nemo.github.io/Skills" +source = "https://github.com/NVIDIA-NeMo/Skills" +issues = "https://github.com/NVIDIA-NeMo/Skills/issues" + +[project.scripts] +ns = "nemo_skills._cli_stub:main" + +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +where = [".."] +exclude = ["tests", "tests.*", "core", "core.*"] + +[tool.setuptools.dynamic] +version = { attr = "nemo_skills.version.__version__" } +dependencies = {file = ["requirements.txt"]} diff --git a/core/requirements.txt b/core/requirements.txt new file mode 100644 index 0000000000..4648f38081 --- /dev/null +++ b/core/requirements.txt @@ -0,0 +1,43 @@ +# Core dependencies for inference, evaluation, tool calling, and all benchmark evaluators. +# No cluster orchestration deps (nemo_run, typer, etc.) +# NOTE: benchmark-specific deps are included here because JIT install is not yet implemented. +# Once JIT install is ready, benchmark deps can be moved to per-benchmark extras. + +bs4 +compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770 +datasets +editdistance +evalplus @ git+https://github.com/evalplus/evalplus@c91370f +faiss-cpu +fire +flask +func-timeout +gradio +httpx +huggingface_hub +hydra-core +ipython +iso639-lang +langcodes +language-data +litellm[caching] +math-verify[antlr4_9_3] +mcp +numpy +openai +openpyxl>=3.1.0 +pandas>=2.0.0 +pyxlsb>=1.0.10 +pyyaml +rank_bm25 +requests +rich +sacrebleu +scikit-learn +sentence_transformers +serpapi +sympy +torchcodec +tqdm +transformers +wandb diff --git a/dockerfiles/Dockerfile.nemo-skills b/dockerfiles/Dockerfile.nemo-skills index ba40a21f5c..736ef2dec7 100644 --- a/dockerfiles/Dockerfile.nemo-skills +++ b/dockerfiles/Dockerfile.nemo-skills @@ -55,12 +55,13 @@ RUN pip install langdetect absl-py immutabledict nltk ipython && \ # we aren't copying main nemo_skills folder as it will always be mounted from host # but we do want to install all requirements in the container directly -RUN mkdir -p /opt/NeMo-Skills/requirements +RUN mkdir -p /opt/NeMo-Skills/requirements /opt/NeMo-Skills/core COPY pyproject.toml README.md /opt/NeMo-Skills/ COPY requirements /opt/NeMo-Skills/requirements/ +COPY core/requirements.txt /opt/NeMo-Skills/core/requirements.txt # installing sdp in container only RUN pip install git+https://github.com/NVIDIA/NeMo-speech-data-processor@29b9b1ec0ceaf3ffa441c1d01297371b3f8e11d2 -ARG CACHEBUST=3 -RUN pip install --no-cache-dir -r /opt/NeMo-Skills/requirements/main.txt +ARG CACHEBUST=4 +RUN pip install --no-cache-dir -r /opt/NeMo-Skills/core/requirements.txt -r /opt/NeMo-Skills/requirements/pipeline.txt # Fix http mismatch between lepton and dggs by manually downloading dggs here RUN pip install ddgs diff --git a/docs/basics/installation.md b/docs/basics/installation.md new file mode 100644 index 0000000000..3fccbf8222 --- /dev/null +++ b/docs/basics/installation.md @@ -0,0 +1,48 @@ +# Installation & Dependency Groups + +NeMo Skills provides two installable packages: + +- **`nemo-skills`** (root) -- full install with CLI, cluster orchestration, all benchmarks +- **`nemo-skills-core`** (`core/` subdirectory) -- lightweight runtime only + +## Default installation + +`pip install nemo-skills` gives you **everything** (inference, evaluation, CLI, +cluster orchestration, benchmarks): + +```bash +pip install git+https://github.com/NVIDIA-NeMo/Skills.git +# or, from a local clone: +pip install -e . +``` + +## Lightweight installation + +If you only need inference, evaluation, and tool calling (no cluster orchestration): + +```bash +pip install "nemo-skills-core @ git+https://github.com/NVIDIA-NeMo/Skills.git#subdirectory=core" +# or, from a local clone: +pip install -e core/ +``` + +## Extras (dependency groups) + +| Extra | Requirements file | What it provides | +|-------|-------------------|------------------| +| `core` | `core/requirements.txt` | Agent runtime: inference, evaluation, tool calling (MCP), prompt formatting, math/code grading. No cluster orchestration. | +| `pipeline` | `requirements/pipeline.txt` | CLI (`ns` command), cluster management, experiment tracking (`nemo_run`, `typer`, `wandb`). | +| `dev` | `requirements/common-tests.txt`, `requirements/common-dev.txt` | Development and testing tools (`pytest`, `ruff`, `pre-commit`). | + +### Examples + +```bash +# Full install (default) +pip install -e . + +# Core only -- lightweight runtime for downstream integrations +pip install -e core/ + +# Development (everything + dev tools) +pip install -e ".[dev]" +``` diff --git a/mkdocs.yml b/mkdocs.yml index f90fca535d..a8d9ea8f58 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,6 +56,7 @@ nav: - Nemo-Skills: index.md - Getting started: - basics/index.md + - Installation & Dependencies: basics/installation.md - Cluster configs: basics/cluster-configs.md - Code packaging: basics/code-packaging.md - Prompt format: basics/prompt-format.md diff --git a/nemo_skills/_cli_stub.py b/nemo_skills/_cli_stub.py new file mode 100644 index 0000000000..833b3ff34e --- /dev/null +++ b/nemo_skills/_cli_stub.py @@ -0,0 +1,20 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +def main(): + print("nemo-skills-core is installed (lightweight mode).\nFor the full ns CLI, run: pip install nemo-skills") + sys.exit(1) diff --git a/nemo_skills/dataset/utils.py b/nemo_skills/dataset/utils.py index 1bc65fcbbf..8074832558 100644 --- a/nemo_skills/dataset/utils.py +++ b/nemo_skills/dataset/utils.py @@ -17,7 +17,6 @@ import json import os import sys -import tempfile import time import urllib.request from pathlib import Path @@ -25,7 +24,6 @@ from urllib.error import URLError from nemo_skills.evaluation.math_grader import extract_answer -from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path def locate(path): @@ -92,20 +90,6 @@ def add_to_path(p): sys.path = old_path -def _get_dataset_module_from_cluster(cluster_config, mounted_path): - with tempfile.TemporaryDirectory() as tmpdir: - tmp_path = str(Path(tmpdir) / "init.py") - cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path) - try: - cluster_download_file(cluster_config, cluster_dataset_path, tmp_path) - except FileNotFoundError: - raise RuntimeError( - f"Init file {mounted_path} not found on the cluster. " - f"Please check the dataset name you're using. Did you forget to run prepare data commands?" - ) - return import_from_path(tmp_path) - - def get_dataset_name(dataset): """Extract the canonical dataset name from a dataset identifier (short name or path).""" if "/" in dataset: @@ -181,7 +165,7 @@ def get_default_dataset_module(dataset): return dataset_module, data_path -def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchmark_map=None): +def get_dataset_module(dataset, data_dir=None, extra_benchmark_map=None): """Get dataset module from nemo_skills.dataset, extra benchmark map, or a directory path. Resolution order: @@ -191,7 +175,10 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchm - If found in exactly one, use it. - If found in neither, fall back to data_dir if provided. 3. If data_dir is provided and previous resolution failed, try to load the module - from data_dir (locally or by downloading from cluster). + from data_dir locally. + + For cluster-aware loading (SSH downloads, mount resolution), use + nemo_skills.pipeline.dataset.get_dataset_module instead. Args: extra_benchmark_map: Either a dict mapping short names to directory paths, @@ -224,19 +211,10 @@ def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchm if found_builtin: return dataset_module, data_path - # Fall back to data_dir if provided + # Fall back to data_dir if provided (local only) if data_dir: - dataset_as_path = dataset.replace(".", "/") - if cluster_config is None or cluster_config["executor"] == "none": - with add_to_path(data_dir): - dataset_module = importlib.import_module(dataset) - elif cluster_config["executor"] == "local": - with add_to_path(get_unmounted_path(cluster_config, data_dir)): - dataset_module = importlib.import_module(dataset) - else: - dataset_module = _get_dataset_module_from_cluster( - cluster_config, f"{data_dir}/{dataset_as_path}/__init__.py" - ) + with add_to_path(data_dir): + dataset_module = importlib.import_module(dataset) return dataset_module, data_dir map_path = ( diff --git a/nemo_skills/evaluation/evaluator/__init__.py b/nemo_skills/evaluation/evaluator/__init__.py index 4ba170bc2e..6e1365e856 100644 --- a/nemo_skills/evaluation/evaluator/__init__.py +++ b/nemo_skills/evaluation/evaluator/__init__.py @@ -13,83 +13,90 @@ # limitations under the License. import asyncio +import importlib import inspect from typing import Any, Callable, Dict from nemo_skills.dataset.utils import locate -from nemo_skills.evaluation.evaluator.audio import AudioEvaluator from nemo_skills.evaluation.evaluator.base import BaseEvaluator -from nemo_skills.evaluation.evaluator.bfcl import eval_bfcl -from nemo_skills.evaluation.evaluator.bird import BirdEvaluator -from nemo_skills.evaluation.evaluator.code import ( - CodeExecEvaluator, - eval_bigcodebench, - eval_evalplus, - eval_human_eval_infilling, - eval_livebench_coding, - eval_livecodebench_pro, -) -from nemo_skills.evaluation.evaluator.compute_eval import ComputeEvalEvaluator -from nemo_skills.evaluation.evaluator.critpt import CritPtEvaluator -from nemo_skills.evaluation.evaluator.dsbench import DSBenchEvaluator -from nemo_skills.evaluation.evaluator.icpc import ICPCEvaluator -from nemo_skills.evaluation.evaluator.ifbench import eval_ifbench -from nemo_skills.evaluation.evaluator.ifeval import eval_if -from nemo_skills.evaluation.evaluator.ioi import IOIEvaluator -from nemo_skills.evaluation.evaluator.livecodebench import eval_livecodebench -from nemo_skills.evaluation.evaluator.math import ( - Lean4ProofEvaluator, - MathEvaluator, -) -from nemo_skills.evaluation.evaluator.mcq import eval_mcq -from nemo_skills.evaluation.evaluator.mmau_pro import eval_mmau_pro -from nemo_skills.evaluation.evaluator.mrcr import eval_mrcr -from nemo_skills.evaluation.evaluator.ruler import eval_ruler, eval_ruler2 -from nemo_skills.evaluation.evaluator.scicode import eval_scicode - -EVALUATOR_MAP = { - # Function-based evaluators (batch-only) - "evalplus": eval_evalplus, - "if": eval_if, - "ifbench": eval_ifbench, - "bfcl": eval_bfcl, - "multichoice": eval_mcq, - "ruler": eval_ruler, - "ruler2": eval_ruler2, - "livecodebench": eval_livecodebench, - "livebench_coding": eval_livebench_coding, - "livecodebench_pro": eval_livecodebench_pro, - "scicode": eval_scicode, - "mrcr": eval_mrcr, - "bigcodebench": eval_bigcodebench, - "human_eval_infilling": eval_human_eval_infilling, - "mmau-pro": eval_mmau_pro, + +# Lazy evaluator registry — stores dotted paths instead of eagerly importing +# every evaluator (which would pull in benchmark-specific deps like func_timeout, +# faiss, etc.). Actual imports happen on first use. + +# Function-based evaluators (batch-only): eval_type -> "module_path:function_name" +_EVALUATOR_MAP_PATHS = { + "evalplus": "nemo_skills.evaluation.evaluator.code:eval_evalplus", + "if": "nemo_skills.evaluation.evaluator.ifeval:eval_if", + "ifbench": "nemo_skills.evaluation.evaluator.ifbench:eval_ifbench", + "bfcl": "nemo_skills.evaluation.evaluator.bfcl:eval_bfcl", + "multichoice": "nemo_skills.evaluation.evaluator.mcq:eval_mcq", + "ruler": "nemo_skills.evaluation.evaluator.ruler:eval_ruler", + "ruler2": "nemo_skills.evaluation.evaluator.ruler:eval_ruler2", + "livecodebench": "nemo_skills.evaluation.evaluator.livecodebench:eval_livecodebench", + "livebench_coding": "nemo_skills.evaluation.evaluator.code:eval_livebench_coding", + "livecodebench_pro": "nemo_skills.evaluation.evaluator.code:eval_livecodebench_pro", + "scicode": "nemo_skills.evaluation.evaluator.scicode:eval_scicode", + "mrcr": "nemo_skills.evaluation.evaluator.mrcr:eval_mrcr", + "bigcodebench": "nemo_skills.evaluation.evaluator.code:eval_bigcodebench", + "human_eval_infilling": "nemo_skills.evaluation.evaluator.code:eval_human_eval_infilling", + "mmau-pro": "nemo_skills.evaluation.evaluator.mmau_pro:eval_mmau_pro", } -# Evaluator class mapping, other evaluators can be added here as they're converted to classes -EVALUATOR_CLASS_MAP = { - "math": MathEvaluator, - "lean4-proof": Lean4ProofEvaluator, - "code_exec": CodeExecEvaluator, - "ioi": IOIEvaluator, - "icpc": ICPCEvaluator, - "audio": AudioEvaluator, - "bird": BirdEvaluator, - "compute-eval": ComputeEvalEvaluator, - "critpt": CritPtEvaluator, - "dsbench": DSBenchEvaluator, +# Class-based evaluators: eval_type -> "module_path:ClassName" +_EVALUATOR_CLASS_MAP_PATHS = { + "math": "nemo_skills.evaluation.evaluator.math:MathEvaluator", + "lean4-proof": "nemo_skills.evaluation.evaluator.math:Lean4ProofEvaluator", + "code_exec": "nemo_skills.evaluation.evaluator.code:CodeExecEvaluator", + "ioi": "nemo_skills.evaluation.evaluator.ioi:IOIEvaluator", + "icpc": "nemo_skills.evaluation.evaluator.icpc:ICPCEvaluator", + "audio": "nemo_skills.evaluation.evaluator.audio:AudioEvaluator", + "bird": "nemo_skills.evaluation.evaluator.bird:BirdEvaluator", + "compute-eval": "nemo_skills.evaluation.evaluator.compute_eval:ComputeEvalEvaluator", + "critpt": "nemo_skills.evaluation.evaluator.critpt:CritPtEvaluator", + "dsbench": "nemo_skills.evaluation.evaluator.dsbench:DSBenchEvaluator", } # Validation: Ensure no overlap between class and function maps -_class_types = set(EVALUATOR_CLASS_MAP.keys()) -_function_types = set(EVALUATOR_MAP.keys()) -_overlap = _class_types.intersection(_function_types) +_overlap = set(_EVALUATOR_CLASS_MAP_PATHS.keys()).intersection(_EVALUATOR_MAP_PATHS.keys()) if _overlap: raise ValueError( f"Evaluator types cannot be in both EVALUATOR_CLASS_MAP and EVALUATOR_MAP: {_overlap}. " f"Each eval_type must be in exactly one map." ) +# Caches for resolved imports +_resolved_evaluator_map: Dict[str, Callable] = {} +_resolved_class_map: Dict[str, type] = {} + + +def _resolve(dotted: str): + """Import 'module.path:AttributeName' and return the attribute.""" + module_path, attr_name = dotted.rsplit(":", 1) + module = importlib.import_module(module_path) + return getattr(module, attr_name) + + +def _get_evaluator_fn(eval_type: str) -> Callable: + if eval_type not in _resolved_evaluator_map: + _resolved_evaluator_map[eval_type] = _resolve(_EVALUATOR_MAP_PATHS[eval_type]) + return _resolved_evaluator_map[eval_type] + + +def _get_evaluator_cls(eval_type: str) -> type: + if eval_type not in _resolved_class_map: + _resolved_class_map[eval_type] = _resolve(_EVALUATOR_CLASS_MAP_PATHS[eval_type]) + return _resolved_class_map[eval_type] + + +# --- Public API (unchanged signatures) --- + +# Keep EVALUATOR_MAP and EVALUATOR_CLASS_MAP as lazy-resolving dicts for +# any code that iterates them (e.g. listing available types). +# Direct key access goes through the public functions below. +EVALUATOR_MAP = _EVALUATOR_MAP_PATHS +EVALUATOR_CLASS_MAP = _EVALUATOR_CLASS_MAP_PATHS + def _resolve_eval_type(eval_type: str): """Resolve eval_type to either a class or function. @@ -108,15 +115,15 @@ def _resolve_eval_type(eval_type: str): return obj, is_class if eval_type in EVALUATOR_CLASS_MAP: - return EVALUATOR_CLASS_MAP[eval_type], True + return _get_evaluator_cls(eval_type), True if eval_type in EVALUATOR_MAP: - return EVALUATOR_MAP[eval_type], False + return _get_evaluator_fn(eval_type), False return None, False def is_evaluator_registered(eval_type: str): """Check if evaluator is registered in either class or function map.""" - return eval_type in EVALUATOR_CLASS_MAP or eval_type in EVALUATOR_MAP + return eval_type in _EVALUATOR_CLASS_MAP_PATHS or eval_type in _EVALUATOR_MAP_PATHS def register_evaluator(eval_type: str, eval_fn: Callable[[Dict[str, Any]], None], ignore_if_registered: bool = False): @@ -125,17 +132,19 @@ def register_evaluator(eval_type: str, eval_fn: Callable[[Dict[str, Any]], None] return raise ValueError(f"Evaluator for {eval_type} already registered") - EVALUATOR_MAP[eval_type] = eval_fn + _EVALUATOR_MAP_PATHS[eval_type] = None + _resolved_evaluator_map[eval_type] = eval_fn def get_evaluator_class(eval_type: str, config: Dict[str, Any]) -> BaseEvaluator: """Get evaluator instance by type.""" obj, is_class = _resolve_eval_type(eval_type) if obj is None or not is_class: + all_types = sorted(list(EVALUATOR_CLASS_MAP.keys()) + list(EVALUATOR_MAP.keys())) raise ValueError( f"Evaluator class not found for type: {eval_type}.\n" f"Available types with class support: {list(EVALUATOR_CLASS_MAP.keys())}\n" - f"All supported types: {list(EVALUATOR_MAP.keys())}\n" + f"All supported types: {all_types}\n" f"Or use path format: module.path::ClassName or /path/to/file.py::ClassName" ) return obj(config) diff --git a/nemo_skills/pipeline/__init__.py b/nemo_skills/pipeline/__init__.py index 4fc25d0d3c..28cd25fbc0 100644 --- a/nemo_skills/pipeline/__init__.py +++ b/nemo_skills/pipeline/__init__.py @@ -11,3 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +try: + import nemo_run # noqa: F401 +except ModuleNotFoundError: + raise ImportError( + "nemo-skills-core is installed but pipeline functionality requires the full package. " + "Install it with: pip install nemo-skills" + ) from None diff --git a/nemo_skills/pipeline/dataset.py b/nemo_skills/pipeline/dataset.py new file mode 100644 index 0000000000..1278102905 --- /dev/null +++ b/nemo_skills/pipeline/dataset.py @@ -0,0 +1,76 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cluster-aware dataset loading. + +Wraps core's get_dataset_module with cluster support (mount-path resolution, +SSH downloads). Pipeline code that needs cluster_config should import from here +instead of nemo_skills.dataset.utils. +""" + +import importlib +import tempfile +from pathlib import Path + +from nemo_skills.dataset.utils import ( + add_to_path, + import_from_path, +) +from nemo_skills.dataset.utils import ( + get_dataset_module as _core_get_dataset_module, +) +from nemo_skills.pipeline.utils import cluster_download_file, get_unmounted_path + + +def _get_dataset_module_from_cluster(cluster_config, mounted_path): + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = str(Path(tmpdir) / "init.py") + cluster_dataset_path = get_unmounted_path(cluster_config, mounted_path) + try: + cluster_download_file(cluster_config, cluster_dataset_path, tmp_path) + except FileNotFoundError: + raise RuntimeError( + f"Init file {mounted_path} not found on the cluster. " + f"Please check the dataset name you're using. Did you forget to run prepare data commands?" + ) + return import_from_path(tmp_path) + + +def get_dataset_module(dataset, data_dir=None, cluster_config=None, extra_benchmark_map=None): + """Cluster-aware dataset module loading. + + Delegates to core's get_dataset_module for local resolution (path-based, + built-in, extra benchmark map). When a cluster_config is provided and the + dataset requires data_dir fallback, handles mount-path resolution and + SSH downloads. + """ + # No cluster config → delegate entirely to core + if cluster_config is None or cluster_config.get("executor") == "none": + return _core_get_dataset_module(dataset, data_dir=data_dir, extra_benchmark_map=extra_benchmark_map) + + # Try core resolution first (without data_dir — path, built-in, extra map) + try: + return _core_get_dataset_module(dataset, extra_benchmark_map=extra_benchmark_map) + except RuntimeError: + if not data_dir: + raise + + # Core couldn't resolve it; use data_dir with cluster awareness + dataset_as_path = dataset.replace(".", "/") + if cluster_config["executor"] == "local": + with add_to_path(get_unmounted_path(cluster_config, data_dir)): + dataset_module = importlib.import_module(dataset) + else: + dataset_module = _get_dataset_module_from_cluster(cluster_config, f"{data_dir}/{dataset_as_path}/__init__.py") + return dataset_module, data_dir diff --git a/nemo_skills/pipeline/utils/eval.py b/nemo_skills/pipeline/utils/eval.py index e0ecedbe56..ff1ab345fe 100644 --- a/nemo_skills/pipeline/utils/eval.py +++ b/nemo_skills/pipeline/utils/eval.py @@ -21,9 +21,10 @@ from typing import Dict import nemo_skills.pipeline.utils as pipeline_utils -from nemo_skills.dataset.utils import get_dataset_module, get_dataset_name, import_from_path +from nemo_skills.dataset.utils import get_dataset_name, import_from_path from nemo_skills.inference import GENERATION_MODULE_MAP from nemo_skills.inference.generate import GenerationTask +from nemo_skills.pipeline.dataset import get_dataset_module from nemo_skills.utils import compute_chunk_ids, get_logger_name LOG = logging.getLogger(get_logger_name(__file__)) diff --git a/pyproject.toml b/pyproject.toml index d86f184bc6..2e5a5afb14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,8 +54,15 @@ exclude = ["tests", "tests.*"] [tool.setuptools.dynamic] version = { attr = "nemo_skills.version.__version__" } -dependencies = {file = ["requirements/main.txt"]} -optional-dependencies = {dev = { file = ["requirements/common-tests.txt", "requirements/common-dev.txt"] } } +dependencies = {file = ["core/requirements.txt", "requirements/pipeline.txt"]} + +[tool.setuptools.dynamic.optional-dependencies] +# Core: lightweight runtime only (inference, evaluation, tool calling) +core = { file = ["core/requirements.txt"] } +# Pipeline: orchestration, cluster management, CLI (adds nemo_run, typer, wandb) +pipeline = { file = ["requirements/pipeline.txt"] } +# Dev: development and testing tools +dev = { file = ["requirements/common-tests.txt", "requirements/common-dev.txt"] } [tool.pytest.ini_options] markers = [ diff --git a/requirements/core.txt b/requirements/core.txt new file mode 120000 index 0000000000..f8cfc00bc7 --- /dev/null +++ b/requirements/core.txt @@ -0,0 +1 @@ +../core/requirements.txt \ No newline at end of file diff --git a/requirements/main.txt b/requirements/main.txt deleted file mode 100644 index 3f417ae8f0..0000000000 --- a/requirements/main.txt +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -bs4 -click < 8.2.0 # https://github.com/ai-dynamo/dynamo/issues/1039 -compute-eval @ git+https://github.com/NVIDIA/compute-eval.git@2d14770 -datasets -editdistance -# ddgs # Needed for BFCLv4 - currently cannot be installed directly due to lepton obsolete httpx version in use -evalplus @ git+https://github.com/evalplus/evalplus@c91370f -faiss-cpu # Needed for BFCLv4 -fire - -# needed local code execution server for persistent sessions -flask -func-timeout # Needed for BIRD benchmark -gradio -httpx -huggingface_hub -hydra-core -ipython -iso639-lang -langcodes -language-data -litellm[caching] -math-verify[antlr4_9_3] -mcp -nemo-evaluator-launcher<0.1.47 -nemo_run @ git+https://github.com/NVIDIA-NeMo/Run -numpy -openai -openpyxl>=3.1.0 -pandas>=2.0.0 -pyxlsb>=1.0.10 -pyyaml -rank_bm25 # Needed for BFCLv4 -requests -sacrebleu -scikit-learn -sentence_transformers # Needed for BFCLv4 -serpapi # Needed for BFCLv4 -sympy -tqdm -transformers -typer >= 0.13 -wandb diff --git a/requirements/pipeline.txt b/requirements/pipeline.txt new file mode 100644 index 0000000000..8cc278c7ed --- /dev/null +++ b/requirements/pipeline.txt @@ -0,0 +1,7 @@ +# Pipeline/orchestration dependencies (CLI, cluster management, experiment tracking). +# These are additional to core/requirements.txt. + +click < 8.2.0 # https://github.com/ai-dynamo/dynamo/issues/1039 +nemo-evaluator-launcher<0.1.47 +nemo_run @ git+https://github.com/NVIDIA-NeMo/Run +typer >= 0.13 diff --git a/tests/test_dependency_isolation.py b/tests/test_dependency_isolation.py new file mode 100644 index 0000000000..baabcc9df9 --- /dev/null +++ b/tests/test_dependency_isolation.py @@ -0,0 +1,88 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests that core modules can be imported without pipeline dependencies. + +The core/pipeline boundary rule is: Pipeline can import Core, but Core cannot +import Pipeline. Concretely, this means that core modules must not have +top-level imports of nemo_run or anything under nemo_skills.pipeline. + +These tests verify this by importing core modules in a subprocess where +nemo_run is blocked via a sys.modules override. +""" + +import pathlib +import subprocess +import sys + +import pytest + + +def _discover_core_modules(): + """Find all importable nemo_skills subpackages except pipeline.""" + import nemo_skills + + root = pathlib.Path(nemo_skills.__file__).parent + modules = [] + for init in sorted(root.rglob("__init__.py")): + rel = init.parent.relative_to(root) + parts = rel.parts + if not parts: # root __init__.py + continue + if parts[0] == "pipeline" or "__pycache__" in parts: + continue + modules.append("nemo_skills." + ".".join(parts)) + return modules + + +CORE_MODULES = _discover_core_modules() + + +@pytest.mark.parametrize("module_name", CORE_MODULES) +def test_core_module_imports_without_nemo_run(module_name): + """Each core module must import successfully when nemo_run is unavailable.""" + script = ( + "import sys, importlib; " + # Block nemo_run from being importable + "sys.modules['nemo_run'] = None; " + "sys.modules['nemo_skills.pipeline'] = None; " + "sys.modules['nemo_skills.pipeline.utils'] = None; " + f"importlib.import_module('{module_name}'); " + "print('OK')" + ) + result = subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + timeout=30, + ) + assert result.returncode == 0, ( + f"Importing {module_name} failed when nemo_run is unavailable.\n" + f"stdout: {result.stdout}\n" + f"stderr: {result.stderr}" + ) + + +def test_pipeline_can_import_core(): + """Pipeline modules should be able to import core modules.""" + script = "from nemo_skills.pipeline.dataset import get_dataset_module; print('OK')" + result = subprocess.run( + [sys.executable, "-c", script], + capture_output=True, + text=True, + timeout=30, + ) + assert result.returncode == 0, ( + f"Pipeline failed to import core modules.\nstdout: {result.stdout}\nstderr: {result.stderr}" + )