diff --git a/docker/Dockerfile b/docker/Dockerfile index 8b2560a100..539b927e2e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -95,6 +95,13 @@ ARG MAX_JOBS ARG NVTE_BUILD_THREADS_PER_JOB # Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md ARG BUILD_CUSTOM_VLLM +ARG BUILD_CUSTOM_VLLM_URL +ARG BUILD_CUSTOM_VLLM_REF +ARG BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION +# Only use for custom flashinfer installs. +ARG BUILD_CUSTOM_FLASHINFER +ARG BUILD_CUSTOM_FLASHINFER_URL +ARG BUILD_CUSTOM_FLASHINFER_REF # Skip building vLLM or SGLang dependencies (set to any non-empty value to skip) ARG SKIP_VLLM_BUILD ARG SKIP_SGLANG_BUILD @@ -110,15 +117,19 @@ COPY --from=nemo-rl pyproject.toml uv.lock ./ # Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist. COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/ COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh +COPY --from=nemo-rl tools/build-custom-flashinfer.sh ./tools/build-custom-flashinfer.sh COPY --from=nemo-rl --link research/ ./research/ COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/ RUN --mount=type=ssh <<"EOF" bash -exu uv venv --seed if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then - bash tools/build-custom-vllm.sh + bash tools/build-custom-vllm.sh ${BUILD_CUSTOM_VLLM_URL} ${BUILD_CUSTOM_VLLM_REF} ${BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION} source 3rdparty/vllm/nemo-rl.env fi +if [[ -n "${BUILD_CUSTOM_FLASHINFER:-}" ]]; then + bash tools/build-custom-flashinfer.sh ${BUILD_CUSTOM_FLASHINFER_URL} ${BUILD_CUSTOM_FLASHINFER_REF} +fi # uv sync has a more reliable resolver than simple uv pip install which can fail # Sync each training + inference backend one at a time (since they may conflict) diff --git a/docs/guides/use-custom-vllm.md b/docs/guides/use-custom-vllm.md index 8ad0b07688..4c79a54bf8 100644 --- a/docs/guides/use-custom-vllm.md +++ b/docs/guides/use-custom-vllm.md @@ -7,8 +7,8 @@ This guide explains how to use your own version of vLLM while leveraging a pre-c Clone your vLLM fork and build it using the provided script. For example: ```sh -# Usage: bash tools/build-custom-vllm.sh -bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70 +# Usage: bash tools/build-custom-vllm.sh +bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl # [INFO] pyproject.toml updated. NeMo RL is now configured to use the local vLLM at 3rdparty/vllm. # [INFO] Verify this new vllm version by running: @@ -47,7 +47,7 @@ To ensure the custom vLLM install is setup properly in NeMo RL applications, alw ```sh # Ensures vLLM uses the precompiled wheel and avoids recompiling C++ sources -export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl +export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl # Ensures worker venvs are rebuilt to use the custom vLLM. Otherwise it may use the cached version in cached venvs export NRL_FORCE_REBUILD_VENVS=true # This isn't necessary if you only do `uv run foobar.py`, but may be needed if you switching between optional extras `uv run --extra vllm foobar.py`. If you are unsure if you need this, it's safer to include it. diff --git a/tools/build-custom-flashinfer.sh b/tools/build-custom-flashinfer.sh new file mode 100644 index 0000000000..f310355ba9 --- /dev/null +++ b/tools/build-custom-flashinfer.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eou pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(realpath "$SCRIPT_DIR/..")" + + +# Parse command line arguments +GIT_URL=${1:-https://github.com/flashinfer-ai/flashinfer} +GIT_REF=${2:-main} + +BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/flashinfer") +if [[ -e "$BUILD_DIR" ]]; then + echo "[ERROR] $BUILD_DIR already exists. Please remove or move it before running this script." + exit 1 +fi + +echo "Building FlashInfer from:" +echo " FlashInfer Git URL: $GIT_URL" +echo " FlashInfer Git ref: $GIT_REF" + +# Clone the repository +echo "Cloning repository..." +# When running inside Docker with --mount=type=ssh, the known_hosts file is empty. +# Skip host key verification for internal builds (only applies to SSH URLs). +GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" git clone --recursive "$GIT_URL" "$BUILD_DIR" +cd "$BUILD_DIR" +git checkout "$GIT_REF" +git submodule update + +PYPROJECT_TOML="$REPO_ROOT/pyproject.toml" +if [[ ! -f "$PYPROJECT_TOML" ]]; then + echo "[ERROR] pyproject.toml not found at $PYPROJECT_TOML. This script must be run from the repo root and pyproject.toml must exist." + exit 1 +fi + +cd "$REPO_ROOT" + +if [[ -n "$UV_PROJECT_ENVIRONMENT" ]]; then + # We optionally set this if the project environment is outside of the project directory. + # If we do not set this then uv pip install commands will fail + export VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT +fi +# Use tomlkit via uv to idempotently update pyproject.toml +uv run --no-project --with tomlkit python - <<'PY' +from pathlib import Path +from tomlkit import parse, dumps, inline_table + +pyproject_path = Path("pyproject.toml") +text = pyproject_path.read_text() +doc = parse(text) + +# 1) Add [tool.uv.sources].flashinfer-python = { path = "3rdparty/flashinfer", editable = true } +tool = doc.setdefault("tool", {}) +uv = tool.setdefault("uv", {}) +sources = uv.setdefault("sources", {}) +desired = inline_table() +desired.update({"path": "3rdparty/flashinfer", "editable": True}) +sources["flashinfer-python"] = desired + +# 2) Add flashinfer-python to [project.optional-dependencies].vllm +project = doc.get("project") +if project is None: + raise SystemExit("[ERROR] Missing [project] in pyproject.toml") + +opt = project.get("optional-dependencies") +vllm_list = opt["vllm"] +if not vllm_list: + vllm_list = [] +if "flashinfer-python" not in vllm_list: + vllm_list.append("flashinfer-python") +opt["vllm"] = vllm_list + +pyproject_path.write_text(dumps(doc)) +print("[INFO] Updated pyproject.toml for local FlashInfer.") +PY + +# Ensure build deps and re-lock +uv pip install setuptools_scm +uv lock + +cat <