vllm-project · Yikun · May 5, 2025 · May 2, 2025
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
@@ -61,7 +61,8 @@ jobs:
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
-      image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
+      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
+      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
       env:
         HF_ENDPOINT: https://hf-mirror.com
         HF_TOKEN: ${{ secrets.HF_TOKEN }}

diff --git a/Dockerfile b/Dockerfile
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
+FROM quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1

diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.0.0-910b-openeuler22.03-py3.10
+FROM quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
@@ -30,7 +30,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 WORKDIR /workspace
 
-COPY . /vllm-workspace/vllm-ascend/  
+COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
@@ -53,4 +53,4 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
 RUN python3 -m pip install modelscope ray && \
     python3 -m pip cache purge
 
-CMD ["/bin/bash"]
+CMD ["/bin/bash"]
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 - OS: Linux
 - Software:
   * Python >= 3.9, < 3.12
-  * CANN >= 8.0.0
+  * CANN >= 8.1.rc1
   * PyTorch >= 2.5.1, torch-npu >= 2.5.1
   * vLLM (the same version as vllm-ascend)
 

diff --git a/README.zh.md b/README.zh.md
@@ -37,8 +37,8 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 - 硬件：Atlas 800I A2 Inference系列、Atlas A2 Training系列
 - 操作系统：Linux
 - 软件：
-  * Python >= 3.9
-  * CANN >= 8.0.RC2
+  * Python >= 3.9, < 3.12
+  * CANN >= 8.1.RC1
   * PyTorch >= 2.5.1, torch-npu >= 2.5.1
   * vLLM (与vllm-ascend版本一致)
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -73,7 +73,7 @@
     'pip_vllm_ascend_version': "0.8.4rc2",
     'pip_vllm_version': "0.8.4",
     # CANN image tag
-    'cann_image_tag': "8.0.0-910b-ubuntu22.04-py3.10",
+    'cann_image_tag': "8.1.rc1-910b-ubuntu22.04-py3.10",
 }
 
 # Add any paths that contain templates here, relative to this directory.

diff --git a/docs/source/installation.md b/docs/source/installation.md
@@ -11,7 +11,7 @@ This document describes how to install vllm-ascend manually.
 
     | Software  | Supported version | Note                                   |
     |-----------|-------------------|----------------------------------------| 
-    | CANN      | >= 8.0.0          | Required for vllm-ascend and torch-npu |
+    | CANN      | >= 8.1.rc1        | Required for vllm-ascend and torch-npu |
     | torch-npu | >= 2.5.1          | Required for vllm-ascend               |
     | torch     | >= 2.5.1          | Required for torch-npu and vllm        |
 
@@ -69,10 +69,6 @@ docker run --rm \
 :animate: fade-in-slide-down
 You can also install CANN manually:
 
-```{note}
-This guide takes aarch64 as an example. If you run on x86, you need to replace `aarch64` with `x86_64` for the package name shown below.
-```
-
 ```bash
 # Create a virtual environment
 python -m venv vllm-ascend-env
@@ -82,19 +78,19 @@ source vllm-ascend-env/bin/activate
 pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions
 
 # Download and install the CANN package.
-wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-toolkit_8.0.0_linux-aarch64.run
-chmod +x ./Ascend-cann-toolkit_8.0.0_linux-aarch64.run
-./Ascend-cann-toolkit_8.0.0_linux-aarch64.run --full
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run
+./Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run --full
 
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
 
-wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run
-chmod +x ./Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run
-./Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run --install
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run
+./Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run --install
 
-wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-nnal_8.0.0_linux-aarch64.run
-chmod +x ./Ascend-cann-nnal_8.0.0_linux-aarch64.run
-./Ascend-cann-nnal_8.0.0_linux-aarch64.run --install
+wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run
+./Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run --install
 
 source /usr/local/Ascend/nnal/atb/set_env.sh
 ```
@@ -223,6 +219,7 @@ docker run --rm \
     -it $IMAGE bash
 ```
 
+The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed in `/vllm-workspace` and installed in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to help developer immediately take place changes without requiring a new installation.
 ::::
 
 :::::

diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md
@@ -62,6 +62,8 @@ docker run --rm \
 ::::
 :::::
 
+The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed in `/vllm-workspace` and installed in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to help developer immediately take place changes without requiring a new installation.
+
 ## Usage
 
 You can use Modelscope mirror to speed up download:
-Original file line number
+Diff line change
@@ Expand Up / @@ -62,6 +62,8 @@ docker run --rm \ @@
     ::::
     :::::
+    The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed in `/vllm-workspace` and installed in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to help developer immediately take place changes without requiring a new installation.
     ## Usage
     You can use Modelscope mirror to speed up download:
@@ Expand Down @@