diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index d0d1adbb1..b49e52719 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -61,7 +61,8 @@ jobs: name: vLLM Ascend test runs-on: ${{ matrix.os }} container: - image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10 + # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready + image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 env: HF_ENDPOINT: https://hf-mirror.com HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/Dockerfile b/Dockerfile index a3a3fe945..d01b98840 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10 +FROM quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 3fde5aa25..3e3d12712 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.0.0-910b-openeuler22.03-py3.10 +FROM quay.io/ascend/cann:8.1.rc1-910b-openeuler22.03-py3.10 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 @@ -30,7 +30,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} WORKDIR /workspace -COPY . /vllm-workspace/vllm-ascend/ +COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git @@ -53,4 +53,4 @@ RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ RUN python3 -m pip install modelscope ray && \ python3 -m pip cache purge -CMD ["/bin/bash"] \ No newline at end of file +CMD ["/bin/bash"] diff --git a/README.md b/README.md index f16f77113..9cbaf51bd 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l - OS: Linux - Software: * Python >= 3.9, < 3.12 - * CANN >= 8.0.0 + * CANN >= 8.1.rc1 * PyTorch >= 2.5.1, torch-npu >= 2.5.1 * vLLM (the same version as vllm-ascend) diff --git a/README.zh.md b/README.zh.md index 5bb9b1e75..6adb0e183 100644 --- a/README.zh.md +++ b/README.zh.md @@ -37,8 +37,8 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP - 硬件:Atlas 800I A2 Inference系列、Atlas A2 Training系列 - 操作系统:Linux - 软件: - * Python >= 3.9 - * CANN >= 8.0.RC2 + * Python >= 3.9, < 3.12 + * CANN >= 8.1.RC1 * PyTorch >= 2.5.1, torch-npu >= 2.5.1 * vLLM (与vllm-ascend版本一致) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5d8f199cd..1dc45f783 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,7 +73,7 @@ 'pip_vllm_ascend_version': "0.8.4rc2", 'pip_vllm_version': "0.8.4", # CANN image tag - 'cann_image_tag': "8.0.0-910b-ubuntu22.04-py3.10", + 'cann_image_tag': "8.1.rc1-910b-ubuntu22.04-py3.10", } # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/installation.md b/docs/source/installation.md index cb63a4bad..6314e31ad 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -11,7 +11,7 @@ This document describes how to install vllm-ascend manually. | Software | Supported version | Note | |-----------|-------------------|----------------------------------------| - | CANN | >= 8.0.0 | Required for vllm-ascend and torch-npu | + | CANN | >= 8.1.rc1 | Required for vllm-ascend and torch-npu | | torch-npu | >= 2.5.1 | Required for vllm-ascend | | torch | >= 2.5.1 | Required for torch-npu and vllm | @@ -69,10 +69,6 @@ docker run --rm \ :animate: fade-in-slide-down You can also install CANN manually: -```{note} -This guide takes aarch64 as an example. If you run on x86, you need to replace `aarch64` with `x86_64` for the package name shown below. -``` - ```bash # Create a virtual environment python -m venv vllm-ascend-env @@ -82,19 +78,19 @@ source vllm-ascend-env/bin/activate pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions # Download and install the CANN package. -wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-toolkit_8.0.0_linux-aarch64.run -chmod +x ./Ascend-cann-toolkit_8.0.0_linux-aarch64.run -./Ascend-cann-toolkit_8.0.0_linux-aarch64.run --full +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run +./Ascend-cann-toolkit_8.1.RC1_linux-"$(uname -i)".run --full source /usr/local/Ascend/ascend-toolkit/set_env.sh -wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run -chmod +x ./Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run -./Ascend-cann-kernels-910b_8.0.0_linux-aarch64.run --install +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run +./Ascend-cann-kernels-910b_8.1.RC1_linux-"$(uname -i)".run --install -wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.0.0/Ascend-cann-nnal_8.0.0_linux-aarch64.run -chmod +x ./Ascend-cann-nnal_8.0.0_linux-aarch64.run -./Ascend-cann-nnal_8.0.0_linux-aarch64.run --install +wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.1.RC1/Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run +./Ascend-cann-nnal_8.1.RC1_linux-"$(uname -i)".run --install source /usr/local/Ascend/nnal/atb/set_env.sh ``` @@ -223,6 +219,7 @@ docker run --rm \ -it $IMAGE bash ``` +The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed in `/vllm-workspace` and installed in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to help developer immediately take place changes without requiring a new installation. :::: ::::: diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 76edd3894..7ed2ae9ab 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -62,6 +62,8 @@ docker run --rm \ :::: ::::: +The default workdir is `/workspace`, vLLM and vLLM Ascend code are placed in `/vllm-workspace` and installed in [development mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)(`pip install -e`) to help developer immediately take place changes without requiring a new installation. + ## Usage You can use Modelscope mirror to speed up download: