diff --git a/components/src/dynamo/trtllm/main.py b/components/src/dynamo/trtllm/main.py
index 3a8ec5c6bf..fec4052bed 100644
--- a/components/src/dynamo/trtllm/main.py
+++ b/components/src/dynamo/trtllm/main.py
@@ -183,7 +183,6 @@ async def init(runtime: DistributedRuntime, config: Config):
         "pipeline_parallel_size": config.pipeline_parallel_size,
         "moe_expert_parallel_size": config.expert_parallel_size,
         "backend": "pytorch",
-        "skip_tokenizer_init": True,
         "build_config": build_config,
         "kv_cache_config": kv_cache_config,
         "gpus_per_node": gpus_per_node,
@@ -240,8 +239,6 @@ async def init(runtime: DistributedRuntime, config: Config):
     # Populate default sampling params from the model
     tokenizer = tokenizer_factory(arg_map["model"])
     default_sampling_params = SamplingParams()
-    default_sampling_params._setup(tokenizer)
-    default_sampling_params.stop = None
     model_input = ModelInput.Tokens
 
     # Set model type based on disaggregation mode for unified frontend support
diff --git a/container/Dockerfile b/container/Dockerfile
index f91d7720bf..91af71a4e7 100644
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -12,7 +12,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
 # for details and reproducer to manually test if the image
 # can be updated to later versions.
-ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
 
 # Build configuration
 ARG ENABLE_KVBM=false
@@ -53,7 +53,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
 # Redeclare ARGs for this stage
 ARG ARCH
 ARG ARCH_ALT
-ARG PYTHON_VERSION
+ARG PYTHON_VERSION=3.12
 ARG USE_SCCACHE
 ARG SCCACHE_BUCKET
 ARG SCCACHE_REGION
@@ -410,6 +410,8 @@ COPY --chown=dynamo: --from=wheel_builder $CARGO_HOME $CARGO_HOME
 
 COPY --chown=dynamo: ./ /workspace/
 
+# Install Python packages
+# Install dynamo, NIXL, and dynamo-specific dependencies
 RUN uv pip install \
     /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
     /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm
index 9768b402cb..beb2675719 100644
--- a/container/Dockerfile.trtllm
+++ b/container/Dockerfile.trtllm
@@ -1,20 +1,14 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
-
-ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
-ARG PYTORCH_BASE_IMAGE_TAG="25.06-py3"
+ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
+ARG BASE_IMAGE_TAG="25.08-py3" # TODO: test on 25.09-py3
+ARG RELEASE_BUILD
 ARG ENABLE_KVBM=false
-ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
-ARG RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
-
-# TensorRT-LLM specific configuration
-ARG HAS_TRTLLM_CONTEXT=0
-ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
-ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
-ARG GITHUB_TRTLLM_COMMIT
+# ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
+# ARG RUNTIME_IMAGE_TAG="13.0.0-runtime-ubuntu24.04" # TODO: test on 13.0.1
+ARG RUNTIME_IMAGE="nvcr.io/nvidia/tensorrt-llm/release"
+ARG RUNTIME_IMAGE_TAG="1.2.0rc1" # TODO: check for 1.2.0rc2
 
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -37,126 +31,9 @@ ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
 FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
 
 # Copy artifacts from NGC PyTorch image
-FROM ${PYTORCH_BASE_IMAGE}:${PYTORCH_BASE_IMAGE_TAG} AS pytorch_base
-
-##################################################
-########## Framework Builder Stage ##############
-##################################################
-#
-# PURPOSE: Build TensorRT-LLM with root privileges
-#
-# This stage handles TensorRT-LLM installation which requires:
-# - Root access for apt operations (CUDA repos, TensorRT installation)
-# - System-level modifications in install_tensorrt.sh
-# - Virtual environment population with PyTorch and TensorRT-LLM
-#
-# The completed venv is then copied to runtime stage with dynamo ownership
-
 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
 
-ARG ARCH_ALT
-ARG PYTHON_VERSION
-ARG ENABLE_KVBM
-ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
-ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
-ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
-ENV VIRTUAL_ENV=/opt/dynamo/venv
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
-# Install minimal dependencies needed for TensorRT-LLM installation
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        python${PYTHON_VERSION}-dev \
-        python3-pip \
-        curl \
-        git \
-        git-lfs \
-        ca-certificates && \
-    rm -rf /var/lib/apt/lists/*
-
-# Copy uv
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-
-# Create virtual environment
-RUN mkdir -p /opt/dynamo/venv && \
-    uv venv /opt/dynamo/venv --python $PYTHON_VERSION
-
-# Copy pytorch installation from NGC PyTorch
-ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
-ARG TORCHVISION_VER=0.22.0a0+95f10a4e
-ARG SETUPTOOLS_VER=78.1.1
-ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
-ARG JINJA2_VER=3.1.6
-ARG NETWORKX_VER=3.5
-ARG SYMPY_VER=1.14.0
-ARG PACKAGING_VER=23.2
-ARG FLASH_ATTN_VER=2.7.4.post1
-ARG MPMATH_VER=1.3.0
-
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
-COPY --from=pytorch_base /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
-
-# Install TensorRT-LLM and related dependencies
-ARG HAS_TRTLLM_CONTEXT
-ARG TENSORRTLLM_PIP_WHEEL
-ARG TENSORRTLLM_INDEX_URL
-ARG GITHUB_TRTLLM_COMMIT
-
-# Copy only wheel files and commit info from trtllm_wheel stage from build_context
-COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
-COPY --from=trtllm_wheel /*.txt /trtllm_wheel/
-
-# NOTE: locking cuda-python version to <13 to avoid breaks with tensorrt-llm 1.0.0rc6.
-RUN uv pip install "cuda-python>=12,<13"
-
-# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
-# because there might be mismatched versions of TensorRT between the NGC PyTorch
-# and the TRTLLM wheel.
-RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
-    # Clean up any existing conflicting CUDA repository configurations and GPG keys
-    rm -f /etc/apt/sources.list.d/cuda*.list && \
-    rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
-    rm -f /etc/apt/trusted.gpg.d/cuda*.gpg
 
-RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
-        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
-        curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" && \
-        # Modify the script to use virtual environment pip instead of system pip3
-        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
-        bash /tmp/install_tensorrt.sh && \
-        # Install from local wheel directory in build context
-        WHEEL_FILE="$(find /trtllm_wheel -name "*.whl" | head -n 1)"; \
-        if [ -n "$WHEEL_FILE" ]; then \
-            uv pip install "$WHEEL_FILE"; \
-        else \
-            echo "No wheel file found in /trtllm_wheel directory."; \
-            exit 1; \
-        fi; \
-    else \
-        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
-        TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \
-        (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \
-         curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
-        # Modify the script to use virtual environment pip instead of system pip3
-        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
-        bash /tmp/install_tensorrt.sh && \
-        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
-        uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
-    fi
 
 ##################################################
 ########## Runtime Image ########################
@@ -178,40 +55,21 @@ RUN if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
 #
 
 FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
-
-ARG ARCH_ALT
-ARG ENABLE_KVBM
-ARG PYTHON_VERSION
-
+ARG PYTHON_VERSION=3.12
 WORKDIR /workspace
+# ENV VIRTUAL_ENV=/opt/dynamo/venv
 
-ENV ENV=${ENV:-/etc/shinit_v2}
-ENV VIRTUAL_ENV=/opt/dynamo/venv
+# probably not going to work - TODO test this
+# ENV VIRTUAL_ENV=/usr/local/lib/python3.12/dist-packages
+
+ARG ARCH_ALT
 ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
 ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
 ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
 
-ARG DYNAMO_COMMIT_SHA
-ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
-
 # Install Python, build-essential and python3-dev as apt dependencies
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-        # Build tools
-        build-essential \
-        g++ \
-        ninja-build \
-        git \
-        git-lfs \
-        # Python runtime - CRITICAL for virtual environment to work
-        python${PYTHON_VERSION}-dev \
-        python3-pip \
-        # jq for polling various endpoints and health checks
-        jq \
-        # CUDA/ML libraries
-        libcudnn9-cuda-12 \
-        # Network and communication libraries
-        libzmq3-dev \
         # RDMA/UCX libraries required to find RDMA devices
         ibverbs-providers \
         ibverbs-utils \
@@ -222,70 +80,25 @@ RUN apt-get update && \
         rdma-core \
         # OpenMPI dependencies
         openssh-client \
-        openssh-server \
-        # System utilities and dependencies
-        curl && \
-    apt-get clean && \
+        openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from PyTorch base image
-COPY --from=pytorch_base /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
-COPY --from=pytorch_base /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
-COPY --from=pytorch_base /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
-COPY --from=pytorch_base /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
-COPY --from=pytorch_base /usr/local/cuda/include/ /usr/local/cuda/include/
-COPY --from=pytorch_base /usr/local/cuda/nvvm /usr/local/cuda/nvvm
-COPY --from=pytorch_base /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
-COPY --from=pytorch_base /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
-COPY --from=pytorch_base /usr/local/lib/lib* /usr/local/lib/
-
+### COPY NATS & ETCD ###
 # Copy nats and etcd from dynamo_base image
 COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
-COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
+# etcd is already in the base trtllm image
+# COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
 # Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
 ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
 
-# Copy OpenMPI from PyTorch base image
-COPY --from=pytorch_base /opt/hpcx/ompi /opt/hpcx/ompi
-# Copy NUMA library from PyTorch base image
-COPY --from=pytorch_base /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
-
-# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
-COPY --from=pytorch_base /opt/hpcx /opt/hpcx
-# This is needed to make libucc.so visible so pytorch can use it.
-ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
-# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
-# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
-# pytorch-triton is copied after trtllm installation.
-COPY --from=pytorch_base /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
-
-# Copy uv to system /bin
-COPY --from=framework /bin/uv /bin/uvx /bin/
-
-# Copy libgomp.so from framework image
-COPY --from=framework /usr/local/tensorrt /usr/local/tensorrt
-COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libgomp.so* /usr/lib/${ARCH_ALT}-linux-gnu/
-
-# Create dynamo user with group 0 for OpenShift compatibility
-RUN userdel -r ubuntu > /dev/null 2>&1 || true \
-    && useradd -m -s /bin/bash -g 0 dynamo \
-    && [ `id -u dynamo` -eq 1000 ] \
-    && mkdir -p /home/dynamo/.cache /opt/dynamo \
-    && chown -R dynamo: /workspace /home/dynamo /opt/dynamo \
-    && chmod -R g+w /workspace /home/dynamo/.cache /opt/dynamo
-
-# Switch to dynamo user
-USER dynamo
-ENV HOME=/home/dynamo
-ENV DYNAMO_HOME=/workspace
-
-# Copy UCX from framework image as plugin for NIXL
-# Copy NIXL source from framework image
+# Copy UCX from dynamo_base image as plugin for NIXL
+# Copy NIXL source from dynamo_base image
 # Copy dynamo wheels for gitlab artifacts
-COPY --chown=dynamo: --from=dynamo_base /usr/local/ucx /usr/local/ucx
-COPY --chown=dynamo: --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
+COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
+COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
+ENV PATH=/usr/local/ucx/bin:$PATH
 
-ENV PATH="/usr/local/ucx/bin:${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
+ENV DYNAMO_HOME=/workspace
 ENV LD_LIBRARY_PATH=\
 $NIXL_LIB_DIR:\
 $NIXL_PLUGIN_DIR:\
@@ -293,129 +106,74 @@ $NIXL_PLUGIN_DIR:\
 /usr/local/ucx/lib/ucx:\
 /opt/hpcx/ompi/lib:\
 $LD_LIBRARY_PATH
+# ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
 ENV OPAL_PREFIX=/opt/hpcx/ompi
 
-# Copy pre-built venv with PyTorch and TensorRT-LLM from framework stage
-COPY --chown=dynamo: --from=framework ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-
-ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
-ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
-
-# Install dynamo, NIXL, and dynamo-specific dependencies
-COPY --chown=dynamo: benchmarks/ /opt/dynamo/benchmarks/
-COPY --chown=dynamo: --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
-RUN uv pip install \
-      /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
-      /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
-      /opt/dynamo/wheelhouse/nixl/nixl*.whl \
-    && if [ "${ENABLE_KVBM}" = "true" ]; then \
-        uv pip install /opt/dynamo/wheelhouse/kvbm*.whl; \
-       fi \
-    && cd /opt/dynamo/benchmarks \
-    && UV_GIT_LFS=1 uv pip install --no-cache . \
-    && cd - \
-    && rm -rf /opt/dynamo/benchmarks
-
-# Install common and test dependencies
-RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
-    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
-    UV_GIT_LFS=1 uv pip install \
-        --no-cache \
-        --requirement /tmp/requirements.txt \
-        --requirement /tmp/requirements.test.txt
-
-# Copy tests, benchmarks, deploy and components for CI with correct ownership
-COPY --chown=dynamo: tests /workspace/tests
-COPY --chown=dynamo: examples /workspace/examples
-COPY --chown=dynamo: benchmarks /workspace/benchmarks
-COPY --chown=dynamo: deploy /workspace/deploy
-COPY --chown=dynamo: components/ /workspace/components/
-COPY --chown=dynamo: recipes/ /workspace/recipes/
-
-# Copy attribution files with correct ownership
-COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
+### VIRTUAL ENVIRONMENT SETUP ###
 
-# Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
-    sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
-
-# Setup environment for all users
-USER root
-RUN chmod 755 /opt/dynamo/.launch_screen && \
-    echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
-    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
-
-USER dynamo
-
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
-CMD []
-
-###########################################################
-########## Development (run.sh, runs as root user) ########
-###########################################################
-#
-# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
-#
-# This stage runs as root and provides:
-# - Development tools and utilities for local debugging
-# - Support for vscode/cursor development outside the Dev Container plug-in
-#
-# Use this stage if you need a full-featured development environment with extra tools,
-# but do not use it with the Dev Container plug-in.
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+# Create virtual environment
+# RUN mkdir -p /opt/dynamo/venv && \
+#     uv venv /opt/dynamo/venv --python $PYTHON_VERSION
 
-FROM runtime AS dev
+# Activate virtual environment
+# ENV VIRTUAL_ENV=/opt/dynamo/venv \
+#     PATH="/opt/dynamo/venv/bin:${PATH}"
 
-# Don't want ubuntu to be editable, just change uid and gid.
-ARG WORKSPACE_DIR=/workspace
+ENV ENV=${ENV:-/etc/shinit_v2}
 
-# Switch to root for system package installation
-USER root
 
-# Install utilities as root
-RUN apt-get update -y && \
-    apt-get install -y --no-install-recommends  \
-    # Install utilities
-    nvtop \
-    wget \
-    tmux \
-    vim \
-    git \
-    iproute2 \
-    rsync \
-    zip \
-    unzip \
-    htop \
-    # Build Dependencies
-    autoconf \
-    automake \
-    cmake \
-    libtool \
-    meson \
-    net-tools \
-    pybind11-dev \
-    # Rust build dependencies
-    clang \
-    libclang-dev \
-    protobuf-compiler && \
-    rm -rf /var/lib/apt/lists/*
+ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
+ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
 
-# Set workspace directory variable
-ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
-    DYNAMO_HOME=${WORKSPACE_DIR} \
-    RUSTUP_HOME=/usr/local/rustup \
-    CARGO_HOME=/usr/local/cargo \
-    CARGO_TARGET_DIR=/workspace/target \
-    VIRTUAL_ENV=/opt/dynamo/venv \
-    PATH=/usr/local/cargo/bin:$PATH
+# Install dynamo, NIXL, and dynamo-specific dependencies
+COPY benchmarks/ /opt/dynamo/benchmarks/
+COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
+RUN cd /opt/dynamo/wheelhouse/ && \
+    ls -lah  && \
+    ls -lah nixl/ && \
+    pip list &&\
+    # TRTLLM's NIXL installaion doesn't include NIXL python package, so install it from wheelhouse
+    pip install --break-system-packages --no-cache \
+        ai_dynamo_runtime-*.whl \
+        ai_dynamo-*.whl \
+        nixl/nixl-*.whl &&\
+    pip list
+    # TODO: install benchmarks
+    # && cd /opt/dynamo/benchmarks \
+    # && UV_GIT_LFS=1 uv pip install --no-cache --system --break-system-packages . \
+    # && uv pip list \
+    # && cd - \
+    # && rm -rf /opt/dynamo/benchmarks
+
+# TODO: Install common and test dependencies
+# RUN uv pip list
+# RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
+#     --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+#     UV_GIT_LFS=1 uv pip install \
+#         --system --break-system-packages \
+#         --no-cache \
+#         --requirement /tmp/requirements.txt \
+#         --requirement /tmp/requirements.test.txt
+# RUN uv pip list
+# This is needed to make libucc.so visible so pytorch can use it.
+ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
 
-COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
-COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
+# Copy tests, benchmarks, deploy and components for CI
+COPY tests /workspace/tests
+COPY examples /workspace/examples
+COPY benchmarks /workspace/benchmarks
+COPY deploy /workspace/deploy
+COPY components/ /workspace/components/
 
-# Install maturin, for maturin develop
-RUN uv pip install maturin[patchelf]
+# Copy attribution files
+COPY ATTRIBUTION* LICENSE /workspace/
+# Copy launch banner
+RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/workspace/launch_message.txt \
+    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
+    echo "cat ~/.launch_screen" >> ~/.bashrc
+    # echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
 
-# Editable install of dynamo
-COPY pyproject.toml README.md hatch_build.py /workspace/
-RUN uv pip install --no-deps -e .
 
-CMD []
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+CMD []
\ No newline at end of file
diff --git a/container/Dockerfile.trtllm-cuda12 b/container/Dockerfile.trtllm-cuda12
new file mode 100644
index 0000000000..23ddda454a
--- /dev/null
+++ b/container/Dockerfile.trtllm-cuda12
@@ -0,0 +1,364 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
+ARG BASE_IMAGE_TAG="25.08-py3" # TODO: test on 25.09-py3
+ARG RELEASE_BUILD
+ARG ENABLE_KVBM=false
+ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
+ARG RUNTIME_IMAGE_TAG="13.0.0-runtime-ubuntu24.04" # TODO: test on 13.0.1
+
+# TensorRT-LLM specific configuration
+ARG HAS_TRTLLM_CONTEXT=0
+ARG TENSORRTLLM_PIP_WHEEL="tensorrt-llm"
+ARG TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
+ARG GITHUB_TRTLLM_COMMIT
+
+# Define general architecture ARGs for supporting both x86 and aarch64 builds.
+#   ARCH: Used for package suffixes (e.g., amd64, arm64)
+#   ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
+#
+# Default values are for x86/amd64:
+#   --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
+#
+# For arm64/aarch64, build with:
+#   --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
+#
+# NOTE: There isn't an easy way to define one of these values based on the other value
+# without adding if statements everywhere, so just define both as ARGs for now.
+ARG ARCH=amd64
+ARG ARCH_ALT=x86_64
+# Python configuration
+ARG PYTHON_VERSION=3.12
+
+ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
+FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
+
+# Copy artifacts from NGC PyTorch image
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework
+
+
+
+##################################################
+########## Runtime Image ########################
+##################################################
+#
+# PURPOSE: Production runtime environment
+#
+# This stage creates a lightweight production-ready image containing:
+# - Pre-compiled TensorRT-LLM and framework dependencies
+# - Dynamo runtime libraries and Python packages
+# - Essential runtime dependencies and configurations
+# - Optimized for inference workloads and deployment
+#
+# Use this stage when you need:
+# - Production deployment of Dynamo with TensorRT-LLM
+# - Minimal runtime footprint without build tools
+# - Ready-to-run inference server environment
+# - Base for custom application containers
+#
+
+FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
+
+WORKDIR /workspace
+ENV VIRTUAL_ENV=/opt/dynamo/venv
+
+ARG ARCH_ALT
+ARG PYTHON_VERSION
+ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
+ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
+ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
+
+# Install Python, build-essential and python3-dev as apt dependencies
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        # Build tools (required for JIT kernel compilation)
+        build-essential \
+        g++ \
+        ninja-build \
+        git \
+        git-lfs \
+        # Python runtime - CRITICAL for virtual environment to work
+        python${PYTHON_VERSION}-dev \
+        python3-pip \
+        # CUDA/ML libraries
+        libcudnn9-cuda-13 \
+        # Network and communication libraries
+        libzmq3-dev \
+        # RDMA/UCX libraries required to find RDMA devices
+        ibverbs-providers \
+        ibverbs-utils \
+        libibumad3 \
+        libibverbs1 \
+        libnuma1 \
+        librdmacm1 \
+        rdma-core \
+        # OpenMPI dependencies
+        openssh-client \
+        openssh-server \
+        # System utilities
+        ca-certificates \
+        curl \
+        jq \
+        wget && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from framework devel image
+COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
+COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
+COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
+COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
+COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/
+COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm
+COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/
+COPY --from=framework /usr/local/cuda/lib64/libcupti* /usr/local/cuda/lib64/
+COPY --from=framework /usr/local/lib/lib* /usr/local/lib/
+
+### COPY NATS & ETCD ###
+# Copy nats and etcd from dynamo_base image
+COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
+COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
+# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible
+ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH
+
+# Copy UCX from framework image as plugin for NIXL
+# Copy NIXL source from framework image
+# Copy dynamo wheels for gitlab artifacts
+COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
+COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
+ENV PATH=/usr/local/ucx/bin:$PATH
+
+# Copy OpenMPI from framework image
+COPY --from=framework /opt/hpcx/ompi /opt/hpcx/ompi
+# Copy NUMA library from framework image
+COPY --from=framework /usr/lib/${ARCH_ALT}-linux-gnu/libnuma.so* /usr/lib/${ARCH_ALT}-linux-gnu/
+
+ENV DYNAMO_HOME=/workspace
+ENV LD_LIBRARY_PATH=\
+$NIXL_LIB_DIR:\
+$NIXL_PLUGIN_DIR:\
+/usr/local/ucx/lib:\
+/usr/local/ucx/lib/ucx:\
+/opt/hpcx/ompi/lib:\
+$LD_LIBRARY_PATH
+ENV PATH="${VIRTUAL_ENV}/bin:/opt/hpcx/ompi/bin:/usr/local/bin/etcd/:/usr/local/cuda/bin:/usr/local/cuda/nvvm/bin:$PATH"
+ENV OPAL_PREFIX=/opt/hpcx/ompi
+
+### VIRTUAL ENVIRONMENT SETUP ###
+
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+# Create virtual environment
+RUN mkdir -p /opt/dynamo/venv && \
+    uv venv /opt/dynamo/venv --python $PYTHON_VERSION
+
+# Activate virtual environment
+ENV VIRTUAL_ENV=/opt/dynamo/venv \
+    PATH="/opt/dynamo/venv/bin:${PATH}"
+
+# Copy pytorch installation from NGC PyTorch
+ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8
+ARG TORCHVISION_VER=0.23.0a0+428a54c9
+ARG SETUPTOOLS_VER=78.1.1
+ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738
+ARG JINJA2_VER=3.1.6
+ARG NETWORKX_VER=3.5
+ARG SYMPY_VER=1.14.0
+ARG PACKAGING_VER=23.2
+ARG FLASH_ATTN_VER=2.7.4.post1
+ARG MPMATH_VER=1.3.0
+
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torch-${TORCH_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torch-${TORCH_VER}.dist-info
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchgen ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchgen
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision-${TORCHVISION_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision-${TORCHVISION_VER}.dist-info
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/torchvision.libs ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/torchvision.libs
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/functorch ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/functorch
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2 ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/jinja2-${JINJA2_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/jinja2-${JINJA2_VER}.dist-info
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/sympy-${SYMPY_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/sympy-${SYMPY_VER}.dist-info
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/flash_attn_2_cuda.cpython-*-*-linux-gnu.so ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/triton ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/triton
+COPY --from=framework /usr/local/lib/python${PYTHON_VERSION}/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
+
+
+ENV ENV=${ENV:-/etc/shinit_v2}
+
+# Install TensorRT-LLM and related dependencies
+ARG HAS_TRTLLM_CONTEXT
+ARG TENSORRTLLM_PIP_WHEEL
+ARG TENSORRTLLM_INDEX_URL
+
+# Copy only wheel files and commit info from trtllm_wheel stage from build_context
+COPY --from=trtllm_wheel /*.whl /trtllm_wheel/
+COPY --from=trtllm_wheel /*.txt /trtllm_wheel/
+
+# Note: TensorRT needs to be uninstalled before installing the TRTLLM wheel
+# because there might be mismatched versions of TensorRT between the NGC PyTorch
+# and the TRTLLM wheel.
+RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
+    # Clean up any existing conflicting CUDA repository configurations and GPG keys
+    rm -f /etc/apt/sources.list.d/cuda*.list && \
+    rm -f /usr/share/keyrings/cuda-archive-keyring.gpg && \
+    rm -f /etc/apt/trusted.gpg.d/cuda*.gpg && \
+    if [ "$HAS_TRTLLM_CONTEXT" = "1" ]; then \
+        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
+        TRTLLM_COMMIT=$(cat /trtllm_wheel/commit.txt | awk -F'_' '{print $2}') && \
+        echo "Using TRTLLM_COMMIT: $TRTLLM_COMMIT" && \
+        (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${TRTLLM_COMMIT}/docker/common/install_tensorrt.sh" || \
+         curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
+        # Modify the script to use virtual environment pip instead of system pip3
+        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
+        cat /tmp/install_tensorrt.sh && \
+        uv pip install patchelf===0.18.0 && \        
+        bash /tmp/install_tensorrt.sh && \
+        # Install from local wheel directory in build context
+        WHEEL_FILE=$(find /trtllm_wheel -name "*.whl" | head -n 1); \
+        if [ -n "$WHEEL_FILE" ]; then \
+            uv pip install "$WHEEL_FILE"; \
+        else \
+            echo "No wheel file found in /trtllm_wheel directory."; \
+            exit 1; \
+        fi; \
+    else \
+        # Download and run install_tensorrt.sh from TensorRT-LLM GitHub before installing the wheel
+        TRTLLM_VERSION=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p') && \
+        (curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/v${TRTLLM_VERSION}/docker/common/install_tensorrt.sh" || \
+         curl -fsSL --retry 5 --retry-delay 10 --max-time 1800 -o /tmp/install_tensorrt.sh "https://github.com/NVIDIA/TensorRT-LLM/raw/${GITHUB_TRTLLM_COMMIT}/docker/common/install_tensorrt.sh") && \
+        # Modify the script to use virtual environment pip instead of system pip3
+        sed -i 's/pip3 install/uv pip install/g' /tmp/install_tensorrt.sh && \
+        uv pip list && \
+        uv pip install patchelf===0.18.0 && \        
+        bash /tmp/install_tensorrt.sh && \
+        uv pip list && \
+        # Install TensorRT-LLM wheel from the provided index URL, allow dependencies from PyPI
+        uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" "${TENSORRTLLM_PIP_WHEEL}"; \
+        uv pip list; \
+    fi
+
+ENV TENSORRT_LIB_DIR=/usr/local/tensorrt/targets/${ARCH_ALT}-linux-gnu/lib
+ENV LD_LIBRARY_PATH=${TENSORRT_LIB_DIR}:${LD_LIBRARY_PATH}
+
+# Install dynamo, NIXL, and dynamo-specific dependencies
+COPY benchmarks/ /opt/dynamo/benchmarks/
+COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
+RUN uv pip list && \
+    uv pip install \
+    /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
+    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
+    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
+    && uv pip list \
+    && cd /opt/dynamo/benchmarks \
+    && UV_GIT_LFS=1 uv pip install --no-cache . \
+    && uv pip list \
+    && cd - \
+    && rm -rf /opt/dynamo/benchmarks
+
+# Install common and test dependencies
+RUN uv pip list
+RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
+    --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
+    UV_GIT_LFS=1 uv pip install \
+        --no-cache \
+        --requirement /tmp/requirements.txt \
+        --requirement /tmp/requirements.test.txt
+RUN uv pip list
+# Copy UCX libraries, libucc.so is needed by pytorch. May not need to copy whole hpcx dir but only /opt/hpcx/ucc/
+COPY --from=framework /opt/hpcx /opt/hpcx
+# This is needed to make libucc.so visible so pytorch can use it.
+ENV LD_LIBRARY_PATH="/opt/hpcx/ucc/lib:${LD_LIBRARY_PATH}"
+# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
+# networkx, packaging, setuptools get overridden by trtllm installation, so not copying them
+# pytorch-triton is copied after trtllm installation.
+COPY --from=framework /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
+
+# Copy tests, benchmarks, deploy and components for CI
+COPY tests /workspace/tests
+COPY examples /workspace/examples
+COPY benchmarks /workspace/benchmarks
+COPY deploy /workspace/deploy
+COPY components/ /workspace/components/
+
+# Copy attribution files
+COPY ATTRIBUTION* LICENSE /workspace/
+# Copy launch banner
+RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
+    sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
+    echo "cat ~/.launch_screen" >> ~/.bashrc && \
+    echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
+
+
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+CMD []
+
+###########################################################
+########## Development (run.sh, runs as root user) ########
+###########################################################
+#
+# PURPOSE: Local development environment for use with run.sh (not Dev Container plug-in)
+#
+# This stage runs as root and provides:
+# - Development tools and utilities for local debugging
+# - Support for vscode/cursor development outside the Dev Container plug-in
+#
+# Use this stage if you need a full-featured development environment with extra tools,
+# but do not use it with the Dev Container plug-in.
+
+FROM runtime AS dev
+
+# Don't want ubuntu to be editable, just change uid and gid.
+ARG WORKSPACE_DIR=/workspace
+
+# Install utilities as root
+RUN apt-get update -y && \
+    apt-get install -y --no-install-recommends  \
+    # Install utilities
+    nvtop \
+    wget \
+    tmux \
+    vim \
+    git \
+    iproute2 \
+    rsync \
+    zip \
+    unzip \
+    htop \
+    # Build Dependencies
+    autoconf \
+    automake \
+    cmake \
+    libtool \
+    meson \
+    net-tools \
+    pybind11-dev \
+    # Rust build dependencies
+    clang \
+    libclang-dev \
+    protobuf-compiler && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set workspace directory variable
+ENV WORKSPACE_DIR=${WORKSPACE_DIR} \
+    DYNAMO_HOME=${WORKSPACE_DIR} \
+    RUSTUP_HOME=/usr/local/rustup \
+    CARGO_HOME=/usr/local/cargo \
+    CARGO_TARGET_DIR=/workspace/target \
+    VIRTUAL_ENV=/opt/dynamo/venv \
+    PATH=/usr/local/cargo/bin:$PATH
+
+COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
+COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo
+
+# Install maturin, for maturin develop
+RUN uv pip list
+RUN uv pip install maturin[patchelf]
+RUN uv pip list
+# Editable install of dynamo
+COPY pyproject.toml README.md hatch_build.py /workspace/
+RUN uv pip list
+RUN uv pip install --no-deps -e .
+RUN uv pip list
+CMD []
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
index 3cb388c3fc..95c3d68322 100644
--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -3,21 +3,18 @@
 # SPDX-License-Identifier: Apache-2.0
 
 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
-# Please check https://github.com/ai-dynamo/dynamo/pull/1065
-# for details and reproducer to manually test if the image
-# can be updated to later versions.
-ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+ARG BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
+ARG RELEASE_BUILD
 ARG ENABLE_KVBM=false
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
-ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
-ARG CUDA_VERSION="12.8"
+ARG RUNTIME_IMAGE_TAG="13.0.1-runtime-ubuntu24.04"
+ARG CUDA_VERSION="13.0"
 
 # Make sure to update the dependency version in pyproject.toml when updating this
-ARG VLLM_REF="v0.11.0"
+ARG VLLM_REF="v0.11.1rc2"
 # FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
-ARG FLASHINF_REF="v0.3.1"
-ARG TORCH_BACKEND="cu128"
+ARG FLASHINF_REF="v0.4.1"
+ARG TORCH_BACKEND="cu130"
 
 # If left blank, then we will fallback to vLLM defaults
 ARG DEEPGEMM_REF=""
@@ -81,6 +78,7 @@ RUN apt-get update -y \
         ibverbs-utils \
         libibumad-dev \
         libibverbs-dev \
+        libmlx5-1 \
         libnuma-dev \
         librdmacm-dev \
         rdma-core \
@@ -143,7 +141,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
         cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
         chmod +x /tmp/install_vllm.sh && \
-        /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} --torch-backend $TORCH_BACKEND --cuda-version $CUDA_VERSION && \
+        /tmp/install_vllm.sh --editable  \
+            --vllm-ref $VLLM_REF \
+            --max-jobs $MAX_JOBS \
+            --arch $ARCH \
+            --installation-dir /opt ${DEEPGEMM_REF:+--deepgemm-ref "$DEEPGEMM_REF"} ${FLASHINF_REF:+--flashinf-ref "$FLASHINF_REF"} \
+            --torch-backend $TORCH_BACKEND \
+            --cuda-version $CUDA_VERSION && \
         /tmp/use-sccache.sh show-stats "vLLM";
 
 ENV LD_LIBRARY_PATH=\
@@ -206,7 +210,7 @@ RUN apt-get update && \
         # prometheus dependencies
         ca-certificates \
         # DeepGemm uses 'cuobjdump' which does not come with CUDA image
-        cuda-command-line-tools-12-8 && \
+        cuda-command-line-tools-13-0 && \
     rm -rf /var/lib/apt/lists/*
 
 # Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image
diff --git a/container/build.sh b/container/build.sh
index 2bc2327cc0..4d3a1534a7 100755
--- a/container/build.sh
+++ b/container/build.sh
@@ -59,7 +59,7 @@ BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
 
 # Base Images
 TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
-TRTLLM_BASE_IMAGE_TAG=25.06-py3
+TRTLLM_BASE_IMAGE_TAG=25.08-py3
 
 # Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
 # we need to build the TensorRT-LLM wheel from source.
@@ -89,16 +89,17 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
 # TensorRT-LLM commit to use for building the trtllm wheel if not provided.
 # Important Note: This commit is not used in our CI pipeline. See the CI
 # variables to learn how to run a pipeline with a specific commit.
-DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="0c9430e5a530ba958fc9dca561a3ad865ad9f492"
+DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="736e7ee136e0d65f98704db13ab7e053803033c4" # tag v1.2.0rc1
 TRTLLM_COMMIT=""
 TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
 TRTLLM_GIT_URL=""
 
 # TensorRT-LLM PyPI index URL
-DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.python.org/simple"
+DEFAULT_TENSORRTLLM_INDEX_URL="https://download.pytorch.org/whl/cu130"
 # TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
 # Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
-DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.1.0rc5"
+DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc1"
+TENSORRTLLM_INDEX_URL=""
 TENSORRTLLM_PIP_WHEEL=""
 
 
@@ -107,13 +108,13 @@ VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
 # for details and reproducer to manually test if the image
 # can be updated to later versions.
-VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+VLLM_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
 
 NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+NONE_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
 
 SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
-SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
+SGLANG_BASE_IMAGE_TAG="25.09-cuda13.0-devel-ubuntu24.04"
 
 NIXL_REF=0.7.1
 NIXL_UCX_REF=v1.19.0
@@ -566,7 +567,7 @@ build_local_dev_with_header() {
         set -x
     fi
 
-    $RUN_PREFIX docker build \
+    $RUN_PREFIX docker build --progress=plain  \
         --build-arg DEV_BASE="$dev_base_image" \
         --build-arg USER_UID="$USER_UID" \
         --build-arg USER_GID="$USER_GID" \
@@ -848,15 +849,22 @@ if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
         echo "======================================"
         echo "Starting Build 1: Base Image"
         echo "======================================"
-        $RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+        # Build 1 (container/Dockerfile) does NOT use (will be removed soon):
+        #   - FRAMEWORK
+        #   - VLLM_FRAMEWORK (or TRTLLM_FRAMEWORK, SGLANG_FRAMEWORK, etc.)
+        #   - VERSION
+        #   - PYTHON_PACKAGE_VERSION
+        #   - HF_TOKEN
+        #   - MAX_JOBS
+        $RUN_PREFIX docker build --progress=plain -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
         # Start framework build
         echo "======================================"
         echo "Starting Build 2: Framework Image"
         echo "======================================"
         BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}"
-        $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+        $RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
     else
-        $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
+        $RUN_PREFIX docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE
     fi
 fi
 
diff --git a/container/deps/requirements.standard.txt b/container/deps/requirements.standard.txt
index 8c91855380..1ac1b14644 100644
--- a/container/deps/requirements.standard.txt
+++ b/container/deps/requirements.standard.txt
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ucx-py-cu12
+ucx-py-cu13
diff --git a/container/deps/requirements.txt b/container/deps/requirements.txt
index f519852b45..9573373107 100644
--- a/container/deps/requirements.txt
+++ b/container/deps/requirements.txt
@@ -1,10 +1,10 @@
 # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-
-accelerate==1.6.0
-aiconfigurator @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
+--extra-index-url https://download.pytorch.org/whl/cu130 # this is only needed for accelerate dependencies
+accelerate
+# aiconfigurator # @ git+https://github.com/ai-dynamo/aiconfigurator.git@5554d2eb8206738c66048bf2d72183e9bcd85759
 aiofiles
-aiperf @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f
+# aiperf # @ git+https://github.com/ai-dynamo/aiperf.git@4d3fa29403c8f75da22a14f1f7b3aeb27db9288f
 av==15.0.0
 fastapi==0.120.1
 ftfy
@@ -17,7 +17,7 @@ kubernetes_asyncio
 matplotlib
 msgspec
 mypy
-nvidia-ml-py==13.580.65
+nvidia-ml-py==13.580.82
 opentelemetry-api
 opentelemetry-sdk
 pip
@@ -33,8 +33,8 @@ PyYAML
 scikit-learn
 scipy<1.14.0  # Pin scipy version for pmdarima compatibility
 sentencepiece
-tensorboard==2.19.0
-tensorboardX==2.6.2.2
+tensorboard==2.20.0
+tensorboardX==2.6.4
 transformers
 types-aiofiles
 types-PyYAML
diff --git a/container/deps/trtllm/install_nixl.sh b/container/deps/trtllm/install_nixl.sh
index 4bc31d2e3d..d76ca1f548 100755
--- a/container/deps/trtllm/install_nixl.sh
+++ b/container/deps/trtllm/install_nixl.sh
@@ -23,11 +23,11 @@ set -ex
 
 GITHUB_URL="https://github.com"
 
-UCX_VERSION="v1.18.1"
+UCX_VERSION="v1.19.0" # suggested by Ovidiu Mara
 UCX_INSTALL_PATH="/usr/local/ucx/"
 CUDA_PATH="/usr/local/cuda"
 
-NIXL_COMMIT="16348080f5bdeb9fe6058a23be140cec020ef3f3"
+NIXL_COMMIT="9ada51f154cc3bedcf94b3a3fcdea6e9b4117284" # suggested by Ovidiu Mara
 
 UCX_REPO="https://github.com/openucx/ucx.git"
 NIXL_REPO="https://github.com/ai-dynamo/nixl.git"
diff --git a/container/deps/vllm/install_vllm.sh b/container/deps/vllm/install_vllm.sh
index 0ebbb58823..3dce9eaeef 100755
--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -13,7 +13,7 @@
 
 set -euo pipefail
 
-VLLM_REF="v0.11.0"
+VLLM_REF="v0.11.1rc1"
 
 # Basic Configurations
 ARCH=$(uname -m)
@@ -21,10 +21,10 @@ MAX_JOBS=16
 INSTALLATION_DIR=/tmp
 
 # VLLM and Dependency Configurations
-TORCH_BACKEND="cu128"
+TORCH_BACKEND="cu130"
 TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels
 DEEPGEMM_REF=""
-CUDA_VERSION="12.8" # For DEEPGEMM
+CUDA_VERSION="13.0" # For DEEPGEMM
 
 # These flags are applicable when installing vLLM from source code
 EDITABLE=true
@@ -146,6 +146,7 @@ else
     # VLLM_REF does not start with 'v' or amd64 - use git checkout path
     if [ "$ARCH" = "arm64" ]; then
 
+        # TODO: update comments for torch 2.9.0
         # torch 2.8.0 doesn't have a aarch wheel for cu128, vLLM uses torch 2.8.0 nightly wheel builds to compile its aarch wheel against
         # nightly can be unstable so we will not use it here
         # for now we will use torch 2.7.1+cu128 but this requires a recompilation from source
@@ -154,7 +155,7 @@ else
 
         # Try to install specific PyTorch version first
         echo "Attempting to install pinned PyTorch nightly versions..."
-        if ! uv pip install torch==2.7.1+cu128 torchaudio==2.7.1 torchvision==0.22.1 --index-url https://download.pytorch.org/whl/cu128; then
+        if ! uv pip install torch==2.9.0+cu130 torchaudio==2.9.0+cu130 torchvision==0.24.0+cu130 --index-url https://download.pytorch.org/whl/cu130; then
             echo "Pinned versions failed"
             exit 1
         fi