Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
512b71a
feat: bumping Dockerfile to CUDA13 image
saturley-hall Oct 9, 2025
9ed18a6
fix: base installation of vllm missing EP kernels
saturley-hall Oct 10, 2025
660eac3
Update the trtllm version
tanmayv25 Oct 14, 2025
b6b3f7a
Update for newer TRTLLM
tanmayv25 Oct 16, 2025
6ebbfa1
Improve sampling params
tanmayv25 Oct 16, 2025
a33a483
Fix the build
tanmayv25 Oct 16, 2025
7d22f7c
Fixes in trtllm and vllm
dmitry-tokarev-nv Oct 17, 2025
5906f79
torch 2.9.0+cu130 and ucx cu 13
dmitry-tokarev-nv Oct 17, 2025
fa86ca2
build.sh - trtllm==1.2.0rc0
dmitry-tokarev-nv Oct 21, 2025
fa1ee73
vllm v0.11.1rc1
dmitry-tokarev-nv Oct 21, 2025
d77c548
Updated UCX and NIXL. Added uv pip list for debugging
dmitry-tokarev-nv Oct 21, 2025
d67e7de
TENSORRTLLM_INDEX_URL="https://download.pytorch.org/whl/cu130"
dmitry-tokarev-nv Oct 22, 2025
7b4e4b3
use --extra-index-url https://download.pytorch.org/whl/cu130 in requi…
dmitry-tokarev-nv Oct 22, 2025
cad2721
vllm 0.11.1rc2, flashinfer 0.4.1
dmitry-tokarev-nv Oct 24, 2025
b66a61e
debug lines
dmitry-tokarev-nv Oct 31, 2025
7f84b6d
Fix debug commands
dmitry-tokarev-nv Nov 3, 2025
393de95
install dynamo on system level
dmitry-tokarev-nv Nov 4, 2025
b427f5d
Fixes in trtllm container
dmitry-tokarev-nv Nov 4, 2025
25c19f0
Fixed wheel file names to support diff versions and archs
dmitry-tokarev-nv Nov 5, 2025
f98796a
comments
dmitry-tokarev-nv Nov 5, 2025
4f5cb2a
Merge branch 'release/0.7.0' into dtokarev/cuda13
nv-tusharma Nov 21, 2025
52f81ae
Merge branch 'release/0.7.0' into dtokarev/cuda13
nv-tusharma Nov 21, 2025
e57a3c7
resovle merge conflicts
nv-tusharma Nov 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions components/src/dynamo/trtllm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ async def init(runtime: DistributedRuntime, config: Config):
"pipeline_parallel_size": config.pipeline_parallel_size,
"moe_expert_parallel_size": config.expert_parallel_size,
"backend": "pytorch",
"skip_tokenizer_init": True,
"build_config": build_config,
"kv_cache_config": kv_cache_config,
"gpus_per_node": gpus_per_node,
Expand Down Expand Up @@ -240,8 +239,6 @@ async def init(runtime: DistributedRuntime, config: Config):
# Populate default sampling params from the model
tokenizer = tokenizer_factory(arg_map["model"])
default_sampling_params = SamplingParams()
default_sampling_params._setup(tokenizer)
default_sampling_params.stop = None
model_input = ModelInput.Tokens

# Set model type based on disaggregation mode for unified frontend support
Expand Down
6 changes: 4 additions & 2 deletions container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"

# Build configuration
ARG ENABLE_KVBM=false
Expand Down Expand Up @@ -53,7 +53,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
ARG PYTHON_VERSION
ARG PYTHON_VERSION=3.12
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
Expand Down Expand Up @@ -410,6 +410,8 @@ COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME

COPY --chown=dynamo: ./ /workspace/

# Install Python packages
# Install dynamo, NIXL, and dynamo-specific dependencies
RUN uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
Expand Down
406 changes: 82 additions & 324 deletions container/Dockerfile.trtllm

Large diffs are not rendered by default.

364 changes: 364 additions & 0 deletions container/Dockerfile.trtllm-cuda12

Large diffs are not rendered by default.

28 changes: 16 additions & 12 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,18 @@
# SPDX-License-Identifier: Apache-2.0

ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.8"
ARG RUNTIME_IMAGE_TAG="13.0.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="13.0"

# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="v0.11.0"
ARG VLLM_REF="v0.11.1rc2"
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF="v0.3.1"
ARG TORCH_BACKEND="cu128"
ARG FLASHINF_REF="v0.4.1"
ARG TORCH_BACKEND="cu130"

# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
Expand Down Expand Up @@ -81,6 +78,7 @@ RUN apt-get update -y \
ibverbs-utils \
libibumad-dev \
libibverbs-dev \
libmlx5-1 \
libnuma-dev \
librdmacm-dev \
rdma-core \
Expand Down Expand Up @@ -143,7 +141,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \
/tmp/install_vllm.sh --editable \
--vllm-ref $VLLM_REF \
--max-jobs $MAX_JOBS \
--arch $ARCH \
--installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} \
--torch-backend $TORCH_BACKEND \
--cuda-version $CUDA_VERSION && \
/tmp/use-sccache.sh show-stats "vLLM";

ENV LD_LIBRARY_PATH=\
Expand Down Expand Up @@ -206,7 +210,7 @@ RUN apt-get update && \
# prometheus dependencies
ca-certificates \
# DeepGemm uses 'cuobjdump' which does not come with CUDA image
cuda-command-line-tools-12-8 && \
cuda-command-line-tools-13-0 && \
rm -rf /var/lib/apt/lists/*

# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
Expand Down
30 changes: 19 additions & 11 deletions container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")

# Base Images
TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
TRTLLM_BASE_IMAGE_TAG=25.06-py3
TRTLLM_BASE_IMAGE_TAG=25.08-py3

# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
# we need to build the TensorRT-LLM wheel from source.
Expand Down Expand Up @@ -89,16 +89,17 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="0c9430e5a530ba958fc9dca561a3ad865ad9f492"
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="736e7ee136e0d65f98704db13ab7e053803033c4" # tag v1.2.0rc1
TRTLLM_COMMIT=""
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
TRTLLM_GIT_URL=""

# TensorRT-LLM PyPI index URL
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
DEFAULT_TENSORRTLLM_INDEX_URL="https://download.pytorch.org/whl/cu130"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc5"
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc1"
TENSORRTLLM_INDEX_URL=""
TENSORRTLLM_PIP_WHEEL=""


Expand All @@ -107,13 +108,13 @@ VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
VLLM_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"

NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NONE_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"

SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"

NIXL_REF=0.7.1
NIXL_UCX_REF=v1.19.0
Expand Down Expand Up @@ -566,7 +567,7 @@ build_local_dev_with_header() {
set -x
fi

$RUN_PREFIX docker build \
$RUN_PREFIX docker build --progress=plain \
--build-arg DEV_BASE="$dev_base_image" \
--build-arg USER_UID="$USER_UID" \
--build-arg USER_GID="$USER_GID" \
Expand Down Expand Up @@ -848,15 +849,22 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
echo "======================================"
echo "Starting Build 1: Base Image"
echo "======================================"
$RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
# Build 1 (container/Dockerfile) does NOT use (will be removed soon):
# - FRAMEWORK
# - VLLM_FRAMEWORK (or TRTLLM_FRAMEWORK, SGLANG_FRAMEWORK, etc.)
# - VERSION
# - PYTHON_PACKAGE_VERSION
# - HF_TOKEN
# - MAX_JOBS
$RUN_PREFIX docker build --progress=plain -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
# Start framework build
echo "======================================"
echo "Starting Build 2: Framework Image"
echo "======================================"
BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}"
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
$RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
else
$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
$RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
fi
fi

Expand Down
2 changes: 1 addition & 1 deletion container/deps/requirements.standard.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ucx-py-cu12
ucx-py-cu13
14 changes: 7 additions & 7 deletions container/deps/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

accelerate==1.6.0
aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
--extra-index-url https://download.pytorch.org/whl/cu130 # this is only needed for accelerate dependencies
accelerate
# aiconfigurator # @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
aiofiles
aiperf @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f
# aiperf # @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f
av==15.0.0
fastapi==0.120.1
ftfy
Expand All @@ -17,7 +17,7 @@ kubernetes_asyncio
matplotlib
msgspec
mypy
nvidia-ml-py==13.580.65
nvidia-ml-py==13.580.82
opentelemetry-api
opentelemetry-sdk
pip
Expand All @@ -33,8 +33,8 @@ PyYAML
scikit-learn
scipy<1.14.0 # Pin scipy version for pmdarima compatibility
sentencepiece
tensorboard==2.19.0
tensorboardX==2.6.2.2
tensorboard==2.20.0
tensorboardX==2.6.4
transformers
types-aiofiles
types-PyYAML
Expand Down
4 changes: 2 additions & 2 deletions container/deps/trtllm/install_nixl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ set -ex

GITHUB_URL="https://github.com"

UCX_VERSION="v1.18.1"
UCX_VERSION="v1.19.0" # suggested by Ovidiu Mara
UCX_INSTALL_PATH="/usr/local/ucx/"
CUDA_PATH="/usr/local/cuda"

NIXL_COMMIT="16348080f5bdeb9fe6058a23be140cec020ef3f3"
NIXL_COMMIT="9ada51f154cc3bedcf94b3a3fcdea6e9b4117284" # suggested by Ovidiu Mara

UCX_REPO="https://github.com/openucx/ucx.git"
NIXL_REPO="https://github.com/ai-dynamo/nixl.git"
Expand Down
9 changes: 5 additions & 4 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@

set -euo pipefail

VLLM_REF="v0.11.0"
VLLM_REF="v0.11.1rc1"

# Basic Configurations
ARCH=$(uname -m)
MAX_JOBS=16
INSTALLATION_DIR=/tmp

# VLLM and Dependency Configurations
TORCH_BACKEND="cu128"
TORCH_BACKEND="cu130"
TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels
DEEPGEMM_REF=""
CUDA_VERSION="12.8" # For DEEPGEMM
CUDA_VERSION="13.0" # For DEEPGEMM

# These flags are applicable when installing vLLM from source code
EDITABLE=true
Expand Down Expand Up @@ -146,6 +146,7 @@ else
# VLLM_REF does not start with 'v' or amd64 - use git checkout path
if [ "$ARCH" = "arm64" ]; then

# TODO: update comments for torch 2.9.0
# torch 2.8.0 doesn't have a aarch wheel for cu128, vLLM uses torch 2.8.0 nightly wheel builds to compile its aarch wheel against
# nightly can be unstable so we will not use it here
# for now we will use torch 2.7.1+cu128 but this requires a recompilation from source
Expand All @@ -154,7 +155,7 @@ else

# Try to install specific PyTorch version first
echo "Attempting to install pinned PyTorch nightly versions..."
if ! uv pip install torch==2.7.1+cu128 torchaudio==2.7.1 torchvision==0.22.1 --index-url https://download.pytorch.org/whl/cu128; then
if ! uv pip install torch==2.9.0+cu130 torchaudio==2.9.0+cu130 torchvision==0.24.0+cu130 --index-url https://download.pytorch.org/whl/cu130; then
echo "Pinned versions failed"
exit 1
fi
Expand Down
Loading