Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,13 @@ ARG MAX_JOBS
ARG NVTE_BUILD_THREADS_PER_JOB
# Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
ARG BUILD_CUSTOM_VLLM
ARG BUILD_CUSTOM_VLLM_URL
ARG BUILD_CUSTOM_VLLM_REF
ARG BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION
# Only use for custom flashinfer installs.
ARG BUILD_CUSTOM_FLASHINFER
ARG BUILD_CUSTOM_FLASHINFER_URL
ARG BUILD_CUSTOM_FLASHINFER_REF
# Skip building vLLM or SGLang dependencies (set to any non-empty value to skip)
ARG SKIP_VLLM_BUILD
ARG SKIP_SGLANG_BUILD
Expand All @@ -110,15 +117,19 @@ COPY --from=nemo-rl pyproject.toml uv.lock ./
# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
COPY --from=nemo-rl tools/build-custom-flashinfer.sh ./tools/build-custom-flashinfer.sh
COPY --from=nemo-rl --link research/ ./research/
COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/

RUN --mount=type=ssh <<"EOF" bash -exu
uv venv --seed
if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
bash tools/build-custom-vllm.sh
bash tools/build-custom-vllm.sh ${BUILD_CUSTOM_VLLM_URL} ${BUILD_CUSTOM_VLLM_REF} ${BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION}
source 3rdparty/vllm/nemo-rl.env
fi
if [[ -n "${BUILD_CUSTOM_FLASHINFER:-}" ]]; then
bash tools/build-custom-flashinfer.sh ${BUILD_CUSTOM_FLASHINFER_URL} ${BUILD_CUSTOM_FLASHINFER_REF}
fi
# uv sync has a more reliable resolver than simple uv pip install which can fail

# Sync each training + inference backend one at a time (since they may conflict)
Expand Down
6 changes: 3 additions & 3 deletions docs/guides/use-custom-vllm.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ This guide explains how to use your own version of vLLM while leveraging a pre-c
Clone your vLLM fork and build it using the provided script. For example:

```sh
# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_REF> <VLLM_WHEEL_COMMIT>
bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70
# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_REF> <VLLM_PRECOMPILED_WHEEL_LOCATION>
bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl

# [INFO] pyproject.toml updated. NeMo RL is now configured to use the local vLLM at 3rdparty/vllm.
# [INFO] Verify this new vllm version by running:
Expand Down Expand Up @@ -47,7 +47,7 @@ To ensure the custom vLLM install is setup properly in NeMo RL applications, alw

```sh
# Ensures vLLM uses the precompiled wheel and avoids recompiling C++ sources
export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
# Ensures worker venvs are rebuilt to use the custom vLLM. Otherwise it may use the cached version in cached venvs
export NRL_FORCE_REBUILD_VENVS=true
# This isn't necessary if you only do `uv run foobar.py`, but may be needed if you switching between optional extras `uv run --extra vllm foobar.py`. If you are unsure if you need this, it's safer to include it.
Expand Down
98 changes: 98 additions & 0 deletions tools/build-custom-flashinfer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/bin/bash
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -eou pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(realpath "$SCRIPT_DIR/..")"


# Parse command line arguments
GIT_URL=${1:-https://github.com/flashinfer-ai/flashinfer}
GIT_REF=${2:-main}

BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/flashinfer")
if [[ -e "$BUILD_DIR" ]]; then
echo "[ERROR] $BUILD_DIR already exists. Please remove or move it before running this script."
exit 1
fi

echo "Building FlashInfer from:"
echo " FlashInfer Git URL: $GIT_URL"
echo " FlashInfer Git ref: $GIT_REF"

# Clone the repository
echo "Cloning repository..."
# When running inside Docker with --mount=type=ssh, the known_hosts file is empty.
# Skip host key verification for internal builds (only applies to SSH URLs).
GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" git clone --recursive "$GIT_URL" "$BUILD_DIR"
cd "$BUILD_DIR"
git checkout "$GIT_REF"
git submodule update

PYPROJECT_TOML="$REPO_ROOT/pyproject.toml"
if [[ ! -f "$PYPROJECT_TOML" ]]; then
echo "[ERROR] pyproject.toml not found at $PYPROJECT_TOML. This script must be run from the repo root and pyproject.toml must exist."
exit 1
fi

cd "$REPO_ROOT"

if [[ -n "$UV_PROJECT_ENVIRONMENT" ]]; then
# We optionally set this if the project environment is outside of the project directory.
# If we do not set this then uv pip install commands will fail
export VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
fi
# Use tomlkit via uv to idempotently update pyproject.toml
uv run --no-project --with tomlkit python - <<'PY'
from pathlib import Path
from tomlkit import parse, dumps, inline_table

pyproject_path = Path("pyproject.toml")
text = pyproject_path.read_text()
doc = parse(text)

# 1) Add [tool.uv.sources].flashinfer-python = { path = "3rdparty/flashinfer", editable = true }
tool = doc.setdefault("tool", {})
uv = tool.setdefault("uv", {})
sources = uv.setdefault("sources", {})
desired = inline_table()
desired.update({"path": "3rdparty/flashinfer", "editable": True})
sources["flashinfer-python"] = desired

# 2) Add flashinfer-python to [project.optional-dependencies].vllm
project = doc.get("project")
if project is None:
raise SystemExit("[ERROR] Missing [project] in pyproject.toml")

opt = project.get("optional-dependencies")
vllm_list = opt["vllm"]
if not vllm_list:
vllm_list = []
if "flashinfer-python" not in vllm_list:
vllm_list.append("flashinfer-python")
opt["vllm"] = vllm_list

pyproject_path.write_text(dumps(doc))
print("[INFO] Updated pyproject.toml for local FlashInfer.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i thought flashinfer was a no build isolation if from source. you probably need this equivalent section from the vllm script:

# 4) Ensure [tool.uv].no-build-isolation-package includes "vllm"
nbip = uv.setdefault("no-build-isolation-package", [])
nbip_strs = [str(x) for x in nbip]
if "vllm" not in nbip_strs:
nbip.append("vllm")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Emm I think it's not needed, at installation time flashinfer doesn't compile anything, it is compiled jit .
in super training branch we just add flashinfer-python to vllm venv, but not in no-build-isolation

PY

# Ensure build deps and re-lock
uv pip install setuptools_scm
uv lock

cat <<EOF
[INFO] pyproject.toml updated. NeMo RL is now configured to use the local FlashInfer at 3rdparty/flashinfer.
EOF
5 changes: 2 additions & 3 deletions tools/build-custom-vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ GIT_URL=${1:-https://github.com/vllm-project/vllm.git}
GIT_REF=${2:-cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d}
# NOTE: VLLM_USE_PRECOMPILED=1 didn't always seem to work since the wheels were sometimes built against an incompatible torch/cuda combo.
# This commit was chosen as one close to the v0.10 release: git merge-base --fork-point origin/main tags/v0.10.0
VLLM_WHEEL_COMMIT=${3:-862f2ef893d9751db0a92bd2d4ae0e3d9677872f} # use full commit hash from the main branch
export VLLM_PRECOMPILED_WHEEL_LOCATION="https://wheels.vllm.ai/${VLLM_WHEEL_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
VLLM_PRECOMPILED_WHEEL_LOCATION=${3:-https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl}
export VLLM_PRECOMPILED_WHEEL_LOCATION

BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/vllm")
if [[ -e "$BUILD_DIR" ]]; then
Expand All @@ -36,7 +36,6 @@ fi
echo "Building vLLM from:"
echo " Vllm Git URL: $GIT_URL"
echo " Vllm Git ref: $GIT_REF"
echo " Vllm Wheel commit: $VLLM_WHEEL_COMMIT"
echo " Vllm Wheel location: $VLLM_PRECOMPILED_WHEEL_LOCATION"

# Clone the repository
Expand Down
Loading