NVIDIA-NeMo · guyueh1 · Feb 5, 2026 · Feb 5, 2026 · Feb 9, 2026 · Feb 10, 2026
@@ -95,6 +95,13 @@ ARG MAX_JOBS
 ARG NVTE_BUILD_THREADS_PER_JOB
 # Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
 ARG BUILD_CUSTOM_VLLM
+ARG BUILD_CUSTOM_VLLM_URL
+ARG BUILD_CUSTOM_VLLM_REF
+ARG BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION
+# Only use for custom flashinfer installs.
+ARG BUILD_CUSTOM_FLASHINFER
+ARG BUILD_CUSTOM_FLASHINFER_URL
+ARG BUILD_CUSTOM_FLASHINFER_REF
 # Skip building vLLM or SGLang dependencies (set to any non-empty value to skip)
 ARG SKIP_VLLM_BUILD
 ARG SKIP_SGLANG_BUILD
@@ -110,15 +117,19 @@ COPY --from=nemo-rl pyproject.toml uv.lock ./
 # Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
 COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
 COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
+COPY --from=nemo-rl tools/build-custom-flashinfer.sh ./tools/build-custom-flashinfer.sh
 COPY --from=nemo-rl --link research/ ./research/
 COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
 
 RUN --mount=type=ssh <<"EOF" bash -exu
 uv venv --seed
 if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
-    bash tools/build-custom-vllm.sh
+    bash tools/build-custom-vllm.sh ${BUILD_CUSTOM_VLLM_URL} ${BUILD_CUSTOM_VLLM_REF} ${BUILD_CUSTOM_VLLM_PRECOMPILED_WHEEL_LOCATION}
     source 3rdparty/vllm/nemo-rl.env
 fi
+if [[ -n "${BUILD_CUSTOM_FLASHINFER:-}" ]]; then
+    bash tools/build-custom-flashinfer.sh ${BUILD_CUSTOM_FLASHINFER_URL} ${BUILD_CUSTOM_FLASHINFER_REF}
+fi
 # uv sync has a more reliable resolver than simple uv pip install which can fail
 
 # Sync each training + inference backend one at a time (since they may conflict)

@@ -7,8 +7,8 @@ This guide explains how to use your own version of vLLM while leveraging a pre-c
 Clone your vLLM fork and build it using the provided script. For example:
 
 ```sh
-# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_REF> <VLLM_WHEEL_COMMIT>
-bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70
+# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_REF> <VLLM_PRECOMPILED_WHEEL_LOCATION>
+bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
 
 # [INFO] pyproject.toml updated. NeMo RL is now configured to use the local vLLM at 3rdparty/vllm.
 # [INFO] Verify this new vllm version by running:
@@ -47,7 +47,7 @@ To ensure the custom vLLM install is setup properly in NeMo RL applications, alw
 
 ```sh
 # Ensures vLLM uses the precompiled wheel and avoids recompiling C++ sources
-export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
 # Ensures worker venvs are rebuilt to use the custom vLLM. Otherwise it may use the cached version in cached venvs
 export NRL_FORCE_REBUILD_VENVS=true
 # This isn't necessary if you only do `uv run foobar.py`, but may be needed if you switching between optional extras `uv run --extra vllm foobar.py`. If you are unsure if you need this, it's safer to include it.

@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eou pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(realpath "$SCRIPT_DIR/..")"
+
+
+# Parse command line arguments
+GIT_URL=${1:-https://github.com/flashinfer-ai/flashinfer}
+GIT_REF=${2:-main}
+
+BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/flashinfer")
+if [[ -e "$BUILD_DIR" ]]; then
+  echo "[ERROR] $BUILD_DIR already exists. Please remove or move it before running this script."
+  exit 1 
+fi
+
+echo "Building FlashInfer from:"
+echo "  FlashInfer Git URL: $GIT_URL"
+echo "  FlashInfer Git ref: $GIT_REF"
+
+# Clone the repository
+echo "Cloning repository..."
+# When running inside Docker with --mount=type=ssh, the known_hosts file is empty.
+# Skip host key verification for internal builds (only applies to SSH URLs).
+GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" git clone --recursive "$GIT_URL" "$BUILD_DIR"
+cd "$BUILD_DIR"
+git checkout "$GIT_REF"
+git submodule update
+
+PYPROJECT_TOML="$REPO_ROOT/pyproject.toml"
+if [[ ! -f "$PYPROJECT_TOML" ]]; then
+  echo "[ERROR] pyproject.toml not found at $PYPROJECT_TOML. This script must be run from the repo root and pyproject.toml must exist."
+  exit 1
+fi
+
+cd "$REPO_ROOT"
+
+if [[ -n "$UV_PROJECT_ENVIRONMENT" ]]; then
+    # We optionally set this if the project environment is outside of the project directory.
+    # If we do not set this then uv pip install commands will fail
+    export VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
+fi
+# Use tomlkit via uv to idempotently update pyproject.toml
+uv run --no-project --with tomlkit python - <<'PY'
+from pathlib import Path
+from tomlkit import parse, dumps, inline_table
+
+pyproject_path = Path("pyproject.toml")
+text = pyproject_path.read_text()
+doc = parse(text)
+
+# 1) Add [tool.uv.sources].flashinfer-python = { path = "3rdparty/flashinfer", editable = true }
+tool = doc.setdefault("tool", {})
+uv = tool.setdefault("uv", {})
+sources = uv.setdefault("sources", {})
+desired = inline_table()
+desired.update({"path": "3rdparty/flashinfer", "editable": True})
+sources["flashinfer-python"] = desired
+
+# 2) Add flashinfer-python to [project.optional-dependencies].vllm
+project = doc.get("project")
+if project is None:
+    raise SystemExit("[ERROR] Missing [project] in pyproject.toml")
+
+opt = project.get("optional-dependencies")
+vllm_list = opt["vllm"]
+if not vllm_list:
+    vllm_list = []
+if "flashinfer-python" not in vllm_list:
+    vllm_list.append("flashinfer-python")
+opt["vllm"] = vllm_list
+
+pyproject_path.write_text(dumps(doc))
+print("[INFO] Updated pyproject.toml for local FlashInfer.")
 # 4) Ensure [tool.uv].no-build-isolation-package includes "vllm" 
 nbip = uv.setdefault("no-build-isolation-package", []) 
 nbip_strs = [str(x) for x in nbip] 
 if "vllm" not in nbip_strs: 
     nbip.append("vllm") 
 # 4) Ensure [tool.uv].no-build-isolation-package includes "vllm" 
 nbip = uv.setdefault("no-build-isolation-package", []) 
 nbip_strs = [str(x) for x in nbip] 
 if "vllm" not in nbip_strs: 
     nbip.append("vllm") 
+PY
+
+# Ensure build deps and re-lock
+uv pip install setuptools_scm
+uv lock
+
+cat <<EOF
+[INFO] pyproject.toml updated. NeMo RL is now configured to use the local FlashInfer at 3rdparty/flashinfer.
+EOF
@@ -24,8 +24,8 @@ GIT_URL=${1:-https://github.com/vllm-project/vllm.git}
 GIT_REF=${2:-cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d}
 # NOTE: VLLM_USE_PRECOMPILED=1 didn't always seem to work since the wheels were sometimes built against an incompatible torch/cuda combo.
 # This commit was chosen as one close to the v0.10 release: git merge-base --fork-point origin/main tags/v0.10.0
-VLLM_WHEEL_COMMIT=${3:-862f2ef893d9751db0a92bd2d4ae0e3d9677872f}  # use full commit hash from the main branch
-export VLLM_PRECOMPILED_WHEEL_LOCATION="https://wheels.vllm.ai/${VLLM_WHEEL_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+VLLM_PRECOMPILED_WHEEL_LOCATION=${3:-https://wheels.vllm.ai/862f2ef893d9751db0a92bd2d4ae0e3d9677872f/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl}
+export VLLM_PRECOMPILED_WHEEL_LOCATION
 
 BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/vllm")
 if [[ -e "$BUILD_DIR" ]]; then
@@ -36,7 +36,6 @@ fi
 echo "Building vLLM from:"
 echo "  Vllm Git URL: $GIT_URL"
 echo "  Vllm Git ref: $GIT_REF"
-echo "  Vllm Wheel commit: $VLLM_WHEEL_COMMIT"
 echo "  Vllm Wheel location: $VLLM_PRECOMPILED_WHEEL_LOCATION"
 
 # Clone the repository