diff --git a/components/src/dynamo/trtllm/main.py b/components/src/dynamo/trtllm/main.py index 3a8ec5c6bf..fec4052bed 100644 --- a/components/src/dynamo/trtllm/main.py +++ b/components/src/dynamo/trtllm/main.py @@ -183,7 +183,6 @@ async def init(runtime: DistributedRuntime, config: Config): "pipeline_parallel_size": config.pipeline_parallel_size, "moe_expert_parallel_size": config.expert_parallel_size, "backend": "pytorch", - "skip_tokenizer_init": True, "build_config": build_config, "kv_cache_config": kv_cache_config, "gpus_per_node": gpus_per_node, @@ -240,8 +239,6 @@ async def init(runtime: DistributedRuntime, config: Config): # Populate default sampling params from the model tokenizer = tokenizer_factory(arg_map["model"]) default_sampling_params = SamplingParams() - default_sampling_params._setup(tokenizer) - default_sampling_params.stop = None model_input = ModelInput.Tokens # Set model type based on disaggregation mode for unified frontend support diff --git a/container/Dockerfile b/container/Dockerfile index f91d7720bf..91af71a4e7 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -12,7 +12,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. -ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04" # Build configuration ARG ENABLE_KVBM=false @@ -53,7 +53,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base # Redeclare ARGs for this stage ARG ARCH ARG ARCH_ALT -ARG PYTHON_VERSION +ARG PYTHON_VERSION=3.12 ARG USE_SCCACHE ARG SCCACHE_BUCKET ARG SCCACHE_REGION @@ -410,6 +410,8 @@ COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --chown=dynamo: ./ /workspace/ +# Install Python packages +# Install dynamo, NIXL, and dynamo-specific dependencies RUN uv pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm index 9768b402cb..beb2675719 100644 --- a/container/Dockerfile.trtllm +++ b/container/Dockerfile.trtllm @@ -1,20 +1,14 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" -ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04" - -ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch" -ARG PYTORCH_BASE_IMAGE_TAG="25.06-py3" +ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" +ARG BASE_IMAGE_TAG="25.08-py3" # TODO: test on 25.09-py3 +ARG RELEASE_BUILD ARG ENABLE_KVBM=false -ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" -ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04" - -# TensorRT-LLM specific configuration -ARG HAS_TRTLLM_CONTEXT=0 -ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" -ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" -ARG GITHUB_TRTLLM_COMMIT +# ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" +# ARG RUNTIME_IMAGE_TAG="13.0.0-runtime-ubuntu24.04" # TODO: test on 13.0.1 +ARG RUNTIME_IMAGE="nvcr.io/nvidia/tensorrt-llm/release" +ARG RUNTIME_IMAGE_TAG="1.2.0rc1" # TODO: check for 1.2.0rc2 # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) @@ -37,126 +31,9 @@ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base # Copy artifacts from NGC PyTorch image -FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base - -################################################## -########## Framework Builder Stage ############## -################################################## -# -# PURPOSE: Build TensorRT-LLM with root privileges -# -# This stage handles TensorRT-LLM installation which requires: -# - Root access for apt operations (CUDA repos, TensorRT installation) -# - System-level modifications in install_tensorrt.sh -# - Virtual environment population with PyTorch and TensorRT-LLM -# -# The completed venv is then copied to runtime stage with dynamo ownership - FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework -ARG ARCH_ALT -ARG PYTHON_VERSION -ARG ENABLE_KVBM -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu -ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins -ENV VIRTUAL_ENV=/opt/dynamo/venv -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - -# Install minimal dependencies needed for TensorRT-LLM installation -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - python${PYTHON_VERSION}-dev \ - python3-pip \ - curl \ - git \ - git-lfs \ - ca-certificates && \ - rm -rf /var/lib/apt/lists/* - -# Copy uv -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ - -# Create virtual environment -RUN mkdir -p /opt/dynamo/venv && \ - uv venv /opt/dynamo/venv --python $PYTHON_VERSION - -# Copy pytorch installation from NGC PyTorch -ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6 -ARG TORCHVISION_VER=0.22.0a0+95f10a4e -ARG SETUPTOOLS_VER=78.1.1 -ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal -ARG JINJA2_VER=3.1.6 -ARG NETWORKX_VER=3.5 -ARG SYMPY_VER=1.14.0 -ARG PACKAGING_VER=23.2 -ARG FLASH_ATTN_VER=2.7.4.post1 -ARG MPMATH_VER=1.3.0 - -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2 -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/ -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton -COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info - -# Install TensorRT-LLM and related dependencies -ARG HAS_TRTLLM_CONTEXT -ARG TENSORRTLLM_PIP_WHEEL -ARG TENSORRTLLM_INDEX_URL -ARG GITHUB_TRTLLM_COMMIT - -# Copy only wheel files and commit info from trtllm_wheel stage from build_context -COPY --from=trtllm_wheel /*.whl /trtllm_wheel/ -COPY --from=trtllm_wheel /*.txt /trtllm_wheel/ - -# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6. -RUN uv pip install "cuda-python>=12,<13" - -# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel -# because there might be mismatched versions of TensorRT between the NGC PyTorch -# and the TRTLLM wheel. -RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ - # Clean up any existing conflicting CUDA repository configurations and GPG keys - rm -f /etc/apt/sources.list.d/cuda*.list && \ - rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \ - rm -f /etc/apt/trusted.gpg.d/cuda*.gpg -RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ - # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel - curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \ - # Modify the script to use virtual environment pip instead of system pip3 - sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ - bash /tmp/install_tensorrt.sh && \ - # Install from local wheel directory in build context - WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \ - if [ -n "$WHEEL_FILE" ]; then \ - uv pip install "$WHEEL_FILE"; \ - else \ - echo "No wheel file found in /trtllm_wheel directory."; \ - exit 1; \ - fi; \ - else \ - # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel - TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \ - (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \ - curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \ - # Modify the script to use virtual environment pip instead of system pip3 - sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ - bash /tmp/install_tensorrt.sh && \ - # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI - uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \ - fi ################################################## ########## Runtime Image ######################## @@ -178,40 +55,21 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ # FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime - -ARG ARCH_ALT -ARG ENABLE_KVBM -ARG PYTHON_VERSION - +ARG PYTHON_VERSION=3.12 WORKDIR /workspace +# ENV VIRTUAL_ENV=/opt/dynamo/venv -ENV ENV=${ENV:-/etc/shinit_v2} -ENV VIRTUAL_ENV=/opt/dynamo/venv +# probably not going to work - TODO test this +# ENV VIRTUAL_ENV=/usr/local/lib/python3.12/dist-packages + +ARG ARCH_ALT ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins -ARG DYNAMO_COMMIT_SHA -ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA - # Install Python, build-essential and python3-dev as apt dependencies RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - # Build tools - build-essential \ - g++ \ - ninja-build \ - git \ - git-lfs \ - # Python runtime - CRITICAL for virtual environment to work - python${PYTHON_VERSION}-dev \ - python3-pip \ - # jq for polling various endpoints and health checks - jq \ - # CUDA/ML libraries - libcudnn9-cuda-12 \ - # Network and communication libraries - libzmq3-dev \ # RDMA/UCX libraries required to find RDMA devices ibverbs-providers \ ibverbs-utils \ @@ -222,70 +80,25 @@ RUN apt-get update && \ rdma-core \ # OpenMPI dependencies openssh-client \ - openssh-server \ - # System utilities and dependencies - curl && \ - apt-get clean && \ + openssh-server && \ rm -rf /var/lib/apt/lists/* -# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image -COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc -COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ -COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas -COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary -COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/ -COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm -COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ -COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/ -COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/ - +### COPY NATS & ETCD ### # Copy nats and etcd from dynamo_base image COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server -COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ +# etcd is already in the base trtllm image +# COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH -# Copy OpenMPI from PyTorch base image -COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi -# Copy NUMA library from PyTorch base image -COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/ - -# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/ -COPY --from=pytorch_base /opt/hpcx /opt/hpcx -# This is needed to make libucc.so visible so pytorch can use it. -ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}" -# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container -# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them -# pytorch-triton is copied after trtllm installation. -COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ - -# Copy uv to system /bin -COPY --from=framework /bin/uv /bin/uvx /bin/ - -# Copy libgomp.so from framework image -COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt -COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/ - -# Create dynamo user with group 0 for OpenShift compatibility -RUN userdel -r ubuntu > /dev/null 2>&1 || true \ - && useradd -m -s /bin/bash -g 0 dynamo \ - && [ `id -u dynamo` -eq 1000 ] \ - && mkdir -p /home/dynamo/.cache /opt/dynamo \ - && chown -R dynamo: /workspace /home/dynamo /opt/dynamo \ - && chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo - -# Switch to dynamo user -USER dynamo -ENV HOME=/home/dynamo -ENV DYNAMO_HOME=/workspace - -# Copy UCX from framework image as plugin for NIXL -# Copy NIXL source from framework image +# Copy UCX from dynamo_base image as plugin for NIXL +# Copy NIXL source from dynamo_base image # Copy dynamo wheels for gitlab artifacts -COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx -COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX +COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx +COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX +ENV PATH=/usr/local/ucx/bin:$PATH -ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" +ENV DYNAMO_HOME=/workspace ENV LD_LIBRARY_PATH=\ $NIXL_LIB_DIR:\ $NIXL_PLUGIN_DIR:\ @@ -293,129 +106,74 @@ $NIXL_PLUGIN_DIR:\ /usr/local/ucx/lib/ucx:\ /opt/hpcx/ompi/lib:\ $LD_LIBRARY_PATH +# ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" ENV OPAL_PREFIX=/opt/hpcx/ompi -# Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage -COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV} - -ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib -ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH} - -# Install dynamo, NIXL, and dynamo-specific dependencies -COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/ -COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ -RUN uv pip install \ - /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \ - /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ - /opt/dynamo/wheelhouse/nixl/nixl*.whl \ - && if [ "${ENABLE_KVBM}" = "true" ]; then \ - uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \ - fi \ - && cd /opt/dynamo/benchmarks \ - && UV_GIT_LFS=1 uv pip install --no-cache . \ - && cd - \ - && rm -rf /opt/dynamo/benchmarks - -# Install common and test dependencies -RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ - --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ - UV_GIT_LFS=1 uv pip install \ - --no-cache \ - --requirement /tmp/requirements.txt \ - --requirement /tmp/requirements.test.txt - -# Copy tests, benchmarks, deploy and components for CI with correct ownership -COPY --chown=dynamo: tests /workspace/tests -COPY --chown=dynamo: examples /workspace/examples -COPY --chown=dynamo: benchmarks /workspace/benchmarks -COPY --chown=dynamo: deploy /workspace/deploy -COPY --chown=dynamo: components/ /workspace/components/ -COPY --chown=dynamo: recipes/ /workspace/recipes/ - -# Copy attribution files with correct ownership -COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/ +### VIRTUAL ENVIRONMENT SETUP ### -# Setup launch banner in common directory accessible to all users -RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \ - sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen - -# Setup environment for all users -USER root -RUN chmod 755 /opt/dynamo/.launch_screen && \ - echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \ - echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc - -USER dynamo - -ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] -CMD [] - -########################################################### -########## Development (run.sh, runs as root user) ######## -########################################################### -# -# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) -# -# This stage runs as root and provides: -# - Development tools and utilities for local debugging -# - Support for vscode/cursor development outside the Dev Container plug-in -# -# Use this stage if you need a full-featured development environment with extra tools, -# but do not use it with the Dev Container plug-in. +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ +# Create virtual environment +# RUN mkdir -p /opt/dynamo/venv && \ +# uv venv /opt/dynamo/venv --python $PYTHON_VERSION -FROM runtime AS dev +# Activate virtual environment +# ENV VIRTUAL_ENV=/opt/dynamo/venv \ +# PATH="/opt/dynamo/venv/bin:${PATH}" -# Don't want ubuntu to be editable, just change uid and gid. -ARG WORKSPACE_DIR=/workspace +ENV ENV=${ENV:-/etc/shinit_v2} -# Switch to root for system package installation -USER root -# Install utilities as root -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends \ - # Install utilities - nvtop \ - wget \ - tmux \ - vim \ - git \ - iproute2 \ - rsync \ - zip \ - unzip \ - htop \ - # Build Dependencies - autoconf \ - automake \ - cmake \ - libtool \ - meson \ - net-tools \ - pybind11-dev \ - # Rust build dependencies - clang \ - libclang-dev \ - protobuf-compiler && \ - rm -rf /var/lib/apt/lists/* +ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib +ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH} -# Set workspace directory variable -ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ - DYNAMO_HOME=${WORKSPACE_DIR} \ - RUSTUP_HOME=/usr/local/rustup \ - CARGO_HOME=/usr/local/cargo \ - CARGO_TARGET_DIR=/workspace/target \ - VIRTUAL_ENV=/opt/dynamo/venv \ - PATH=/usr/local/cargo/bin:$PATH +# Install dynamo, NIXL, and dynamo-specific dependencies +COPY benchmarks/ /opt/dynamo/benchmarks/ +COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ +RUN cd /opt/dynamo/wheelhouse/ && \ + ls -lah && \ + ls -lah nixl/ && \ + pip list &&\ + # TRTLLM's NIXL installaion doesn't include NIXL python package, so install it from wheelhouse + pip install --break-system-packages --no-cache \ + ai_dynamo_runtime-*.whl \ + ai_dynamo-*.whl \ + nixl/nixl-*.whl &&\ + pip list + # TODO: install benchmarks + # && cd /opt/dynamo/benchmarks \ + # && UV_GIT_LFS=1 uv pip install --no-cache --system --break-system-packages . \ + # && uv pip list \ + # && cd - \ + # && rm -rf /opt/dynamo/benchmarks + +# TODO: Install common and test dependencies +# RUN uv pip list +# RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ +# --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ +# UV_GIT_LFS=1 uv pip install \ +# --system --break-system-packages \ +# --no-cache \ +# --requirement /tmp/requirements.txt \ +# --requirement /tmp/requirements.test.txt +# RUN uv pip list +# This is needed to make libucc.so visible so pytorch can use it. +ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}" -COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup -COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo +# Copy tests, benchmarks, deploy and components for CI +COPY tests /workspace/tests +COPY examples /workspace/examples +COPY benchmarks /workspace/benchmarks +COPY deploy /workspace/deploy +COPY components/ /workspace/components/ -# Install maturin, for maturin develop -RUN uv pip install maturin[patchelf] +# Copy attribution files +COPY ATTRIBUTION* LICENSE /workspace/ +# Copy launch banner +RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/workspace/launch_message.txt \ + sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ + echo "cat ~/.launch_screen" >> ~/.bashrc + # echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc -# Editable install of dynamo -COPY pyproject.toml README.md hatch_build.py /workspace/ -RUN uv pip install --no-deps -e . -CMD [] +ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] +CMD [] \ No newline at end of file diff --git a/container/Dockerfile.trtllm-cuda12 b/container/Dockerfile.trtllm-cuda12 new file mode 100644 index 0000000000..23ddda454a --- /dev/null +++ b/container/Dockerfile.trtllm-cuda12 @@ -0,0 +1,364 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" +ARG BASE_IMAGE_TAG="25.08-py3" # TODO: test on 25.09-py3 +ARG RELEASE_BUILD +ARG ENABLE_KVBM=false +ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" +ARG RUNTIME_IMAGE_TAG="13.0.0-runtime-ubuntu24.04" # TODO: test on 13.0.1 + +# TensorRT-LLM specific configuration +ARG HAS_TRTLLM_CONTEXT=0 +ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm" +ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" +ARG GITHUB_TRTLLM_COMMIT + +# Define general architecture ARGs for supporting both x86 and aarch64 builds. +# ARCH: Used for package suffixes (e.g., amd64, arm64) +# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64) +# +# Default values are for x86/amd64: +# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64 +# +# For arm64/aarch64, build with: +# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 +# +# NOTE: There isn't an easy way to define one of these values based on the other value +# without adding if statements everywhere, so just define both as ARGs for now. +ARG ARCH=amd64 +ARG ARCH_ALT=x86_64 +# Python configuration +ARG PYTHON_VERSION=3.12 + +ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" +FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base + +# Copy artifacts from NGC PyTorch image +FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework + + + +################################################## +########## Runtime Image ######################## +################################################## +# +# PURPOSE: Production runtime environment +# +# This stage creates a lightweight production-ready image containing: +# - Pre-compiled TensorRT-LLM and framework dependencies +# - Dynamo runtime libraries and Python packages +# - Essential runtime dependencies and configurations +# - Optimized for inference workloads and deployment +# +# Use this stage when you need: +# - Production deployment of Dynamo with TensorRT-LLM +# - Minimal runtime footprint without build tools +# - Ready-to-run inference server environment +# - Base for custom application containers +# + +FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime + +WORKDIR /workspace +ENV VIRTUAL_ENV=/opt/dynamo/venv + +ARG ARCH_ALT +ARG PYTHON_VERSION +ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl +ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu +ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins + +# Install Python, build-essential and python3-dev as apt dependencies +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + # Build tools (required for JIT kernel compilation) + build-essential \ + g++ \ + ninja-build \ + git \ + git-lfs \ + # Python runtime - CRITICAL for virtual environment to work + python${PYTHON_VERSION}-dev \ + python3-pip \ + # CUDA/ML libraries + libcudnn9-cuda-13 \ + # Network and communication libraries + libzmq3-dev \ + # RDMA/UCX libraries required to find RDMA devices + ibverbs-providers \ + ibverbs-utils \ + libibumad3 \ + libibverbs1 \ + libnuma1 \ + librdmacm1 \ + rdma-core \ + # OpenMPI dependencies + openssh-client \ + openssh-server \ + # System utilities + ca-certificates \ + curl \ + jq \ + wget && \ + rm -rf /var/lib/apt/lists/* + +# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image +COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc +COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ +COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas +COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary +COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/ +COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm +COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ +COPY --from=framework /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/ +COPY --from=framework /usr/local/lib/lib* /usr/local/lib/ + +### COPY NATS & ETCD ### +# Copy nats and etcd from dynamo_base image +COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server +COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ +# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible +ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH + +# Copy UCX from framework image as plugin for NIXL +# Copy NIXL source from framework image +# Copy dynamo wheels for gitlab artifacts +COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx +COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX +ENV PATH=/usr/local/ucx/bin:$PATH + +# Copy OpenMPI from framework image +COPY --from=framework /opt/hpcx/ompi /opt/hpcx/ompi +# Copy NUMA library from framework image +COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/ + +ENV DYNAMO_HOME=/workspace +ENV LD_LIBRARY_PATH=\ +$NIXL_LIB_DIR:\ +$NIXL_PLUGIN_DIR:\ +/usr/local/ucx/lib:\ +/usr/local/ucx/lib/ucx:\ +/opt/hpcx/ompi/lib:\ +$LD_LIBRARY_PATH +ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH" +ENV OPAL_PREFIX=/opt/hpcx/ompi + +### VIRTUAL ENVIRONMENT SETUP ### + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ +# Create virtual environment +RUN mkdir -p /opt/dynamo/venv && \ + uv venv /opt/dynamo/venv --python $PYTHON_VERSION + +# Activate virtual environment +ENV VIRTUAL_ENV=/opt/dynamo/venv \ + PATH="/opt/dynamo/venv/bin:${PATH}" + +# Copy pytorch installation from NGC PyTorch +ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8 +ARG TORCHVISION_VER=0.23.0a0+428a54c9 +ARG SETUPTOOLS_VER=78.1.1 +ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738 +ARG JINJA2_VER=3.1.6 +ARG NETWORKX_VER=3.5 +ARG SYMPY_VER=1.14.0 +ARG PACKAGING_VER=23.2 +ARG FLASH_ATTN_VER=2.7.4.post1 +ARG MPMATH_VER=1.3.0 + +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2 +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/ +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton +COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info + + +ENV ENV=${ENV:-/etc/shinit_v2} + +# Install TensorRT-LLM and related dependencies +ARG HAS_TRTLLM_CONTEXT +ARG TENSORRTLLM_PIP_WHEEL +ARG TENSORRTLLM_INDEX_URL + +# Copy only wheel files and commit info from trtllm_wheel stage from build_context +COPY --from=trtllm_wheel /*.whl /trtllm_wheel/ +COPY --from=trtllm_wheel /*.txt /trtllm_wheel/ + +# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel +# because there might be mismatched versions of TensorRT between the NGC PyTorch +# and the TRTLLM wheel. +RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ + # Clean up any existing conflicting CUDA repository configurations and GPG keys + rm -f /etc/apt/sources.list.d/cuda*.list && \ + rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \ + rm -f /etc/apt/trusted.gpg.d/cuda*.gpg && \ + if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \ + # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel + TRTLLM_COMMIT=$(cat /trtllm_wheel/commit.txt | awk -F'_' '{print $2}') && \ + echo "Using TRTLLM_COMMIT: $TRTLLM_COMMIT" && \ + (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" || \ + curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \ + # Modify the script to use virtual environment pip instead of system pip3 + sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ + cat /tmp/install_tensorrt.sh && \ + uv pip install patchelf===0.18.0 && \ + bash /tmp/install_tensorrt.sh && \ + # Install from local wheel directory in build context + WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \ + if [ -n "$WHEEL_FILE" ]; then \ + uv pip install "$WHEEL_FILE"; \ + else \ + echo "No wheel file found in /trtllm_wheel directory."; \ + exit 1; \ + fi; \ + else \ + # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel + TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \ + (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \ + curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \ + # Modify the script to use virtual environment pip instead of system pip3 + sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \ + uv pip list && \ + uv pip install patchelf===0.18.0 && \ + bash /tmp/install_tensorrt.sh && \ + uv pip list && \ + # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI + uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \ + uv pip list; \ + fi + +ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib +ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH} + +# Install dynamo, NIXL, and dynamo-specific dependencies +COPY benchmarks/ /opt/dynamo/benchmarks/ +COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ +RUN uv pip list && \ + uv pip install \ + /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ + /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ + /opt/dynamo/wheelhouse/nixl/nixl*.whl \ + && uv pip list \ + && cd /opt/dynamo/benchmarks \ + && UV_GIT_LFS=1 uv pip install --no-cache . \ + && uv pip list \ + && cd - \ + && rm -rf /opt/dynamo/benchmarks + +# Install common and test dependencies +RUN uv pip list +RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ + --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ + UV_GIT_LFS=1 uv pip install \ + --no-cache \ + --requirement /tmp/requirements.txt \ + --requirement /tmp/requirements.test.txt +RUN uv pip list +# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/ +COPY --from=framework /opt/hpcx /opt/hpcx +# This is needed to make libucc.so visible so pytorch can use it. +ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}" +# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container +# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them +# pytorch-triton is copied after trtllm installation. +COPY --from=framework /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ + +# Copy tests, benchmarks, deploy and components for CI +COPY tests /workspace/tests +COPY examples /workspace/examples +COPY benchmarks /workspace/benchmarks +COPY deploy /workspace/deploy +COPY components/ /workspace/components/ + +# Copy attribution files +COPY ATTRIBUTION* LICENSE /workspace/ +# Copy launch banner +RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ + sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ + echo "cat ~/.launch_screen" >> ~/.bashrc && \ + echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc + + +ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] +CMD [] + +########################################################### +########## Development (run.sh, runs as root user) ######## +########################################################### +# +# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in) +# +# This stage runs as root and provides: +# - Development tools and utilities for local debugging +# - Support for vscode/cursor development outside the Dev Container plug-in +# +# Use this stage if you need a full-featured development environment with extra tools, +# but do not use it with the Dev Container plug-in. + +FROM runtime AS dev + +# Don't want ubuntu to be editable, just change uid and gid. +ARG WORKSPACE_DIR=/workspace + +# Install utilities as root +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends \ + # Install utilities + nvtop \ + wget \ + tmux \ + vim \ + git \ + iproute2 \ + rsync \ + zip \ + unzip \ + htop \ + # Build Dependencies + autoconf \ + automake \ + cmake \ + libtool \ + meson \ + net-tools \ + pybind11-dev \ + # Rust build dependencies + clang \ + libclang-dev \ + protobuf-compiler && \ + rm -rf /var/lib/apt/lists/* + +# Set workspace directory variable +ENV WORKSPACE_DIR=${WORKSPACE_DIR} \ + DYNAMO_HOME=${WORKSPACE_DIR} \ + RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + CARGO_TARGET_DIR=/workspace/target \ + VIRTUAL_ENV=/opt/dynamo/venv \ + PATH=/usr/local/cargo/bin:$PATH + +COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup +COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo + +# Install maturin, for maturin develop +RUN uv pip list +RUN uv pip install maturin[patchelf] +RUN uv pip list +# Editable install of dynamo +COPY pyproject.toml README.md hatch_build.py /workspace/ +RUN uv pip list +RUN uv pip install --no-deps -e . +RUN uv pip list +CMD [] diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index 3cb388c3fc..95c3d68322 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -3,21 +3,18 @@ # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" -# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now -# Please check https://github.com/ai-dynamo/dynamo/pull/1065 -# for details and reproducer to manually test if the image -# can be updated to later versions. -ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04" +ARG RELEASE_BUILD ARG ENABLE_KVBM=false ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" -ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" -ARG CUDA_VERSION="12.8" +ARG RUNTIME_IMAGE_TAG="13.0.1-runtime-ubuntu24.04" +ARG CUDA_VERSION="13.0" # Make sure to update the dependency version in pyproject.toml when updating this -ARG VLLM_REF="v0.11.0" +ARG VLLM_REF="v0.11.1rc2" # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds -ARG FLASHINF_REF="v0.3.1" -ARG TORCH_BACKEND="cu128" +ARG FLASHINF_REF="v0.4.1" +ARG TORCH_BACKEND="cu130" # If left blank, then we will fallback to vLLM defaults ARG DEEPGEMM_REF="" @@ -81,6 +78,7 @@ RUN apt-get update -y \ ibverbs-utils \ libibumad-dev \ libibverbs-dev \ + libmlx5-1 \ libnuma-dev \ librdmacm-dev \ rdma-core \ @@ -143,7 +141,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \ - /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \ + /tmp/install_vllm.sh --editable \ + --vllm-ref $VLLM_REF \ + --max-jobs $MAX_JOBS \ + --arch $ARCH \ + --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} \ + --torch-backend $TORCH_BACKEND \ + --cuda-version $CUDA_VERSION && \ /tmp/use-sccache.sh show-stats "vLLM"; ENV LD_LIBRARY_PATH=\ @@ -206,7 +210,7 @@ RUN apt-get update && \ # prometheus dependencies ca-certificates \ # DeepGemm uses 'cuobjdump' which does not come with CUDA image - cuda-command-line-tools-12-8 && \ + cuda-command-line-tools-13-0 && \ rm -rf /var/lib/apt/lists/* # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image diff --git a/container/build.sh b/container/build.sh index 2bc2327cc0..4d3a1534a7 100755 --- a/container/build.sh +++ b/container/build.sh @@ -59,7 +59,7 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")") # Base Images TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch -TRTLLM_BASE_IMAGE_TAG=25.06-py3 +TRTLLM_BASE_IMAGE_TAG=25.08-py3 # Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch, # we need to build the TensorRT-LLM wheel from source. @@ -89,16 +89,17 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/" # TensorRT-LLM commit to use for building the trtllm wheel if not provided. # Important Note: This commit is not used in our CI pipeline. See the CI # variables to learn how to run a pipeline with a specific commit. -DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="0c9430e5a530ba958fc9dca561a3ad865ad9f492" +DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="736e7ee136e0d65f98704db13ab7e053803033c4" # tag v1.2.0rc1 TRTLLM_COMMIT="" TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0" TRTLLM_GIT_URL="" # TensorRT-LLM PyPI index URL -DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple" +DEFAULT_TENSORRTLLM_INDEX_URL="https://download.pytorch.org/whl/cu130" # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package. # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package. -DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc5" +DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc1" +TENSORRTLLM_INDEX_URL="" TENSORRTLLM_PIP_WHEEL="" @@ -107,13 +108,13 @@ VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. -VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +VLLM_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04" NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" -NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +NONE_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04" SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" -SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" +SGLANG_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04" NIXL_REF=0.7.1 NIXL_UCX_REF=v1.19.0 @@ -566,7 +567,7 @@ build_local_dev_with_header() { set -x fi - $RUN_PREFIX docker build \ + $RUN_PREFIX docker build --progress=plain \ --build-arg DEV_BASE="$dev_base_image" \ --build-arg USER_UID="$USER_UID" \ --build-arg USER_GID="$USER_GID" \ @@ -848,15 +849,22 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then echo "======================================" echo "Starting Build 1: Base Image" echo "======================================" - $RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE + # Build 1 (container/Dockerfile) does NOT use (will be removed soon): + # - FRAMEWORK + # - VLLM_FRAMEWORK (or TRTLLM_FRAMEWORK, SGLANG_FRAMEWORK, etc.) + # - VERSION + # - PYTHON_PACKAGE_VERSION + # - HF_TOKEN + # - MAX_JOBS + $RUN_PREFIX docker build --progress=plain -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE # Start framework build echo "======================================" echo "Starting Build 2: Framework Image" echo "======================================" BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}" - $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE + $RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE else - $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE + $RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE fi fi diff --git a/container/deps/requirements.standard.txt b/container/deps/requirements.standard.txt index 8c91855380..1ac1b14644 100644 --- a/container/deps/requirements.standard.txt +++ b/container/deps/requirements.standard.txt @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -ucx-py-cu12 +ucx-py-cu13 diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt index f519852b45..9573373107 100644 --- a/container/deps/requirements.txt +++ b/container/deps/requirements.txt @@ -1,10 +1,10 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 - -accelerate==1.6.0 -aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759 +--extra-index-url https://download.pytorch.org/whl/cu130 # this is only needed for accelerate dependencies +accelerate +# aiconfigurator # @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759 aiofiles -aiperf @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f +# aiperf # @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f av==15.0.0 fastapi==0.120.1 ftfy @@ -17,7 +17,7 @@ kubernetes_asyncio matplotlib msgspec mypy -nvidia-ml-py==13.580.65 +nvidia-ml-py==13.580.82 opentelemetry-api opentelemetry-sdk pip @@ -33,8 +33,8 @@ PyYAML scikit-learn scipy<1.14.0 # Pin scipy version for pmdarima compatibility sentencepiece -tensorboard==2.19.0 -tensorboardX==2.6.2.2 +tensorboard==2.20.0 +tensorboardX==2.6.4 transformers types-aiofiles types-PyYAML diff --git a/container/deps/trtllm/install_nixl.sh b/container/deps/trtllm/install_nixl.sh index 4bc31d2e3d..d76ca1f548 100755 --- a/container/deps/trtllm/install_nixl.sh +++ b/container/deps/trtllm/install_nixl.sh @@ -23,11 +23,11 @@ set -ex GITHUB_URL="https://github.com" -UCX_VERSION="v1.18.1" +UCX_VERSION="v1.19.0" # suggested by Ovidiu Mara UCX_INSTALL_PATH="/usr/local/ucx/" CUDA_PATH="/usr/local/cuda" -NIXL_COMMIT="16348080f5bdeb9fe6058a23be140cec020ef3f3" +NIXL_COMMIT="9ada51f154cc3bedcf94b3a3fcdea6e9b4117284" # suggested by Ovidiu Mara UCX_REPO="https://github.com/openucx/ucx.git" NIXL_REPO="https://github.com/ai-dynamo/nixl.git" diff --git a/container/deps/vllm/install_vllm.sh b/container/deps/vllm/install_vllm.sh index 0ebbb58823..3dce9eaeef 100755 --- a/container/deps/vllm/install_vllm.sh +++ b/container/deps/vllm/install_vllm.sh @@ -13,7 +13,7 @@ set -euo pipefail -VLLM_REF="v0.11.0" +VLLM_REF="v0.11.1rc1" # Basic Configurations ARCH=$(uname -m) @@ -21,10 +21,10 @@ MAX_JOBS=16 INSTALLATION_DIR=/tmp # VLLM and Dependency Configurations -TORCH_BACKEND="cu128" +TORCH_BACKEND="cu130" TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels DEEPGEMM_REF="" -CUDA_VERSION="12.8" # For DEEPGEMM +CUDA_VERSION="13.0" # For DEEPGEMM # These flags are applicable when installing vLLM from source code EDITABLE=true @@ -146,6 +146,7 @@ else # VLLM_REF does not start with 'v' or amd64 - use git checkout path if [ "$ARCH" = "arm64" ]; then + # TODO: update comments for torch 2.9.0 # torch 2.8.0 doesn't have a aarch wheel for cu128, vLLM uses torch 2.8.0 nightly wheel builds to compile its aarch wheel against # nightly can be unstable so we will not use it here # for now we will use torch 2.7.1+cu128 but this requires a recompilation from source @@ -154,7 +155,7 @@ else # Try to install specific PyTorch version first echo "Attempting to install pinned PyTorch nightly versions..." - if ! uv pip install torch==2.7.1+cu128 torchaudio==2.7.1 torchvision==0.22.1 --index-url https://download.pytorch.org/whl/cu128; then + if ! uv pip install torch==2.9.0+cu130 torchaudio==2.9.0+cu130 torchvision==0.24.0+cu130 --index-url https://download.pytorch.org/whl/cu130; then echo "Pinned versions failed" exit 1 fi