Skip to content

Commit 35fdb50

Browse files
[Test] Add accuracy nightly test for new models
Signed-off-by: hfadzxy <[email protected]>
1 parent d5fef22 commit 35fdb50

12 files changed

+109
-4
lines changed

.github/workflows/_e2e_nightly_single_node_models.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
name: ${{inputs.model_list}} accuracy test
6060
runs-on: ${{ inputs.runner }}
6161
container:
62-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
62+
image: "${{ inputs.image }}"
6363
env:
6464
VLLM_USE_MODELSCOPE: True
6565
GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
@@ -114,6 +114,12 @@ jobs:
114114
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
115115
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
116116
117+
- name: Install tensorflow (for Molmo-7B-D-0924)
118+
if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
119+
shell: bash -l {0}
120+
run: |
121+
pip install tensorflow --no-cache-dir
122+
117123
- name: Resolve vllm-ascend version
118124
run: |
119125
VERSION_INPUT="${{ inputs.vllm-ascend }}"
@@ -175,6 +181,7 @@ jobs:
175181
id: report
176182
env:
177183
VLLM_WORKER_MULTIPROC_METHOD: spawn
184+
HF_DATASETS_OFFLINE: True
178185
VLLM_USE_MODELSCOPE: True
179186
VLLM_CI_RUNNER: ${{ inputs.runner }}
180187
VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}

.github/workflows/vllm_ascend_test_nightly_a2.yaml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,16 @@ jobs:
114114
- Qwen3-VL-8B-Instruct
115115
- Qwen2.5-Omni-7B
116116
- Meta-Llama-3.1-8B-Instruct
117+
- os: linux-aarch64-a2-1
118+
model_list:
119+
- ERNIE-4.5-0.3B-PT
120+
- gemma-2-9b-it
121+
- gemma-3-4b-it
122+
- internlm-7b
123+
- MiniCPM-2B-dpo-bf16
124+
- InternVL3_5-8B-hf
125+
- llava-1.5-7b-hf
126+
- Molmo-7B-D-0924
117127
- os: linux-aarch64-a2-2
118128
model_list:
119129
- Qwen3-30B-A3B
@@ -128,5 +138,5 @@ jobs:
128138
vllm: v0.11.0
129139
runner: ${{ matrix.test_config.os }}
130140
model_list: ${{ toJson(matrix.test_config.model_list) }}
131-
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
141+
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11'
132142
upload: false
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
model_name: "PaddlePaddle/ERNIE-4.5-0.3B-PT"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,flexible-extract"
7+
value: 0.25
8+
num_fewshot: 5
9+
trust_remote_code: True
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
model_name: "OpenBMB/MiniCPM-2B-dpo-bf16"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "ceval-valid"
5+
metrics:
6+
- name: "acc,none"
7+
value: 0.5
8+
num_fewshot: 5
9+
trust_remote_code: True
10+
gpu_memory_utilization: 0.8
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
model_name: "LLM-Research/Molmo-7B-D-0924"
2+
hardware: "Atlas A2 Series"
3+
model: "vllm-vlm"
4+
tasks:
5+
- name: "ceval-valid"
6+
metrics:
7+
- name: "acc,none"
8+
value: 0.71
9+
max_model_len: 4096
10+
trust_remote_code: True
11+
apply_chat_template: False
12+
fewshot_as_multiturn: False
13+
gpu_memory_utilization: 0.8

tests/e2e/models/configs/accuracy.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,11 @@ Qwen3-VL-30B-A3B-Instruct.yaml
99
Qwen3-VL-8B-Instruct.yaml
1010
Qwen2.5-Omni-7B.yaml
1111
Meta-Llama-3.1-8B-Instruct.yaml
12-
InternVL3_5-8B.yaml
12+
InternVL3_5-8B.yaml
13+
ERNIE-4.5-0.3B-PT.yaml
14+
MiniCPM-2B-dpo-bf16.yaml
15+
gemma-2-9b-it.yaml
16+
gemma-3-4b-it.yaml
17+
internlm-7b.yaml
18+
Molmo-7B-D-0924.yaml
19+
llava-1.5-7b-hf.yaml
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
model_name: "LLM-Research/gemma-2-9b-it"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.46
8+
- name: "exact_match,flexible-extract"
9+
value: 0.79
10+
num_fewshot: 5
11+
gpu_memory_utilization: 0.8
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
model_name: "LLM-Research/gemma-3-4b-it"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.59
8+
- name: "exact_match,flexible-extract"
9+
value: 0.59
10+
num_fewshot: 5
11+
apply_chat_template: False
12+
fewshot_as_multiturn: False
13+
gpu_memory_utilization: 0.7
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
model_name: "Shanghai_AI_Laboratory/internlm-7b"
2+
hardware: "Atlas A2 Series"
3+
tasks:
4+
- name: "ceval-valid"
5+
metrics:
6+
- name: "acc,none"
7+
value: 0.42
8+
num_fewshot: 5
9+
max_model_len: 2048
10+
trust_remote_code: True
11+
dtype: "bfloat16"
12+
apply_chat_template: False
13+
fewshot_as_multiturn: False

0 commit comments

Comments
 (0)