Skip to content

[ci] add test workflow for 3090 machine #3561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
391 changes: 391 additions & 0 deletions .github/workflows/daily_ete_test_3090.yml

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions autotest/config-3090.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
model_path: /nvme/qa_test_models
resource_path: /nvme/qa_test_models/resource
dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
env_tag: 3090

tp_config:
empty: 2

turbomind_chat_model:
- meta-llama/Llama-3.2-3B-Instruct
- internlm/internlm3-8b-instruct
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-7B-Instruct

pytorch_chat_model:
- meta-llama/Llama-3.2-3B-Instruct
- internlm/internlm3-8b-instruct
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-VL-7B-Instruct

turbomind_vl_model:
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B

pytorch_vl_model:
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen2.5-VL-7B-Instruct

turbomind_base_model:
- internlm/internlm3-8b-instruct
- Qwen/Qwen3-8B

pytorch_base_model:
- internlm/internlm3-8b-instruct
- Qwen/Qwen3-8B

turbomind_quatization:
no_awq:
- OpenGVLab/InternVL3-8B
gptq:
- empty
no_kvint4:
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-VL-7B-Instruct
no_kvint8:
- deepseek-ai/DeepSeek-V2-Chat

pytorch_quatization:
awq:
- meta-llama/Llama-3.2-3B-Instruct
- internlm/internlm3-8b-instruct
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-7B-Instruct
w8a8:
- meta-llama/Llama-3.2-3B-Instruct
- internlm/internlm3-8b-instruct
no_kvint4:
- OpenGVLab/InternVL3-8B
- OpenGVLab/InternVL2_5-1B
- Qwen/Qwen3-8B
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-VL-7B-Instruct
no_kvint8:
- deepseek-ai/DeepSeek-V2-Lite-Chat
1 change: 1 addition & 0 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
benchmark_path: /nvme/qa_test_models/benchmark-reports
dataset_path: /nvme/qa_test_models/datasets/ShareGPT_V3_unfiltered_cleaned_split.json
env_tag: a100

tp_config:
internlm2-chat-20b: 2
Expand Down
4 changes: 4 additions & 0 deletions autotest/tools/chat/test_command_chat_hf_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1))
def test_hf_pytorch_chat_tp1(config, model, cli_case_config, worker_id):
usercase = 'chat_testcase'
Expand Down Expand Up @@ -69,6 +70,7 @@ def test_hf_pytorch_chat_tp4(config, model, cli_case_config, worker_id):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=4))
def test_hf_pytorch_chat_kvin4_tp1(config, model, cli_case_config, worker_id):
usercase = 'chat_testcase'
Expand Down Expand Up @@ -131,6 +133,7 @@ def test_hf_pytorch_chat_kvin4_tp4(config, model, cli_case_config, worker_id):
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_pytorch_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=8))
def test_hf_pytorch_chat_kvin8_tp1(config, model, cli_case_config, worker_id):
usercase = 'chat_testcase'
Expand Down Expand Up @@ -192,6 +195,7 @@ def test_hf_pytorch_chat_kvin8_tp4(config, model, cli_case_config, worker_id):
@pytest.mark.order(10)
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.test_3090
@pytest.mark.gpu_num_1
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, model_type='base_model'))
def test_hf_pytorch_base_tp1(config, model, cli_case_config, worker_id):
Expand Down
4 changes: 4 additions & 0 deletions autotest/tools/chat/test_command_chat_hf_turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_hf_turbomind_chat_tp1(config, model, communicator, cli_case_config, worker_id):
Expand Down Expand Up @@ -78,6 +79,7 @@ def test_hf_turbomind_chat_tp4(config, model, communicator, cli_case_config, wor
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=4))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_hf_turbomind_chat_kvint4_tp1(config, model, communicator, cli_case_config, worker_id):
Expand Down Expand Up @@ -146,6 +148,7 @@ def test_hf_turbomind_chat_kvint4_tp4(config, model, communicator, cli_case_conf
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=8))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_hf_turbomind_chat_kvint8_tp1(config, model, communicator, cli_case_config, worker_id):
Expand Down Expand Up @@ -314,6 +317,7 @@ def test_hf_turbomind_chat_fallback_backend_kvint8_tp2(config, model, communicat
@pytest.mark.usefixtures('cli_case_config')
@pytest.mark.hf_turbomind_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, model_type='base_model'))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_hf_turbomind_base_tp1(config, model, communicator, cli_case_config, worker_id):
Expand Down
5 changes: 4 additions & 1 deletion autotest/tools/pipeline/llm_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from lmdeploy import GenerationConfig, PytorchEngineConfig, TurbomindEngineConfig, pipeline
from lmdeploy.utils import is_bf16_supported

gen_config = GenerationConfig(max_new_tokens=500)
gen_config = GenerationConfig(max_new_tokens=500, min_new_tokens=2)


def run_pipeline_chat_test(model_path, cases_path, tp, backend_type, is_pr_test, extra: object = None):
Expand All @@ -24,6 +24,9 @@ def run_pipeline_chat_test(model_path, cases_path, tp, backend_type, is_pr_test,
if 'turbomind' in backend_type and extra is not None and 'communicator' in extra:
backend_config.communicator = extra.get('communicator')

if extra is not None and 'cache-max-entry-count' in extra and extra.get('cache-max-entry-count') is not None:
backend_config.cache_max_entry_count = extra.get('cache-max-entry-count')

if 'w4' in model_path or ('4bits' in model_path or 'awq' in model_path.lower()):
backend_config.model_format = 'awq'
if 'gptq' in model_path.lower():
Expand Down
5 changes: 4 additions & 1 deletion autotest/tools/pipeline/mllm_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from lmdeploy.vl.constants import IMAGE_TOKEN
from lmdeploy.vl.utils import encode_image_base64

gen_config = GenerationConfig(max_new_tokens=500)
gen_config = GenerationConfig(max_new_tokens=500, min_new_tokens=2)

PIC1 = 'tiger.jpeg'
PIC2 = 'human-pose.jpg'
Expand All @@ -33,6 +33,9 @@ def run_pipeline_mllm_test(model_path, resource_path, tp, backend_type, is_pr_te
if 'turbomind' in backend_type and extra is not None and 'communicator' in extra:
backend_config.communicator = extra.get('communicator')

if 'cache-max-entry-count' in extra and extra.get('cache-max-entry-count') is not None:
backend_config.cache_max_entry_count = extra.get('cache-max-entry-count')

if 'w4' in model_path or ('4bits' in model_path or 'awq' in model_path.lower()):
backend_config.model_format = 'awq'
if not is_bf16_supported():
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat_pytorch
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, exclude_dup=True))
def test_pipeline_chat_pytorch_tp1(config, common_case_config, model, worker_id):
Expand Down Expand Up @@ -47,6 +48,7 @@ def test_pipeline_chat_pytorch_tp4(config, common_case_config, model, worker_id)
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=4, exclude_dup=True))
def test_pipeline_chat_kvint4_tp1(config, common_case_config, model, worker_id):
Expand Down Expand Up @@ -85,6 +87,7 @@ def test_pipeline_chat_kvint4_tp4(config, common_case_config, model, worker_id):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=8, exclude_dup=True))
def test_pipeline_chat_kvint8_tp1(config, common_case_config, model, worker_id):
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, model_type='vl_model'))
def test_pipeline_chat_tp1(config, model, worker_id):
if 'gw' in worker_id:
Expand Down Expand Up @@ -47,6 +48,7 @@ def test_pipeline_chat_tp4(config, model, worker_id):
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=4, model_type='vl_model'))
def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
if 'gw' in worker_id:
Expand Down Expand Up @@ -82,6 +84,7 @@ def test_pipeline_chat_kvint4_tp4(config, model, worker_id):
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_torch_model_list(tp_num=1, quant_policy=8, model_type='vl_model'))
def test_pipeline_chat_kvint8_tp1(config, model, worker_id):
if 'gw' in worker_id:
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1))
@pytest.mark.parametrize('communicator', get_communicator_list())
Expand Down Expand Up @@ -50,6 +51,7 @@ def test_pipeline_chat_tp4(config, common_case_config, model, communicator, work
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=4))
@pytest.mark.parametrize('communicator', get_communicator_list())
Expand Down Expand Up @@ -103,6 +105,7 @@ def test_pipeline_chat_kvint4_tp4(config, common_case_config, model, communicato
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.pipeline_chat
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.flaky(reruns=0)
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=8))
@pytest.mark.parametrize('communicator', get_communicator_list())
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, model_type='vl_model'))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_pipeline_chat_tp1(config, model, communicator, worker_id):
Expand Down Expand Up @@ -53,6 +54,7 @@ def test_pipeline_chat_tp4(config, model, communicator, worker_id):
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=4, model_type='vl_model'))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_pipeline_chat_kvint4_tp1(config, model, communicator, worker_id):
Expand Down Expand Up @@ -100,6 +102,7 @@ def test_pipeline_chat_kvint4_tp4(config, model, communicator, worker_id):
@pytest.mark.pipeline_chat
@pytest.mark.flaky(reruns=0)
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('model', get_turbomind_model_list(tp_num=1, quant_policy=8, model_type='vl_model'))
@pytest.mark.parametrize('communicator', get_communicator_list())
def test_pipeline_chat_kvint8_tp1(config, model, communicator, worker_id):
Expand Down
1 change: 1 addition & 0 deletions autotest/tools/quantization/test_quantization_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


@pytest.mark.order(3)
@pytest.mark.test_3090
@pytest.mark.timeout(900)
@pytest.mark.parametrize('model', get_quantization_model_list('awq'))
def test_quantization_awq(config, model, worker_id):
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def getModelList(tp_num):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api_pytorch
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=1), indirect=True)
def test_restful_chat_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -73,6 +74,7 @@ def getKvintModelList(tp_num, quant_policy):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=4), indirect=True)
def test_restful_chat_kvint4_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -109,6 +111,7 @@ def test_restful_chat_kvint4_tp4(config, common_case_config, worker_id):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=8), indirect=True)
def test_restful_chat_kvint8_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def getModelList(tp_num):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=1), indirect=True)
def test_restful_chat_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -70,6 +71,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=4), indirect=True)
def test_restful_chat_kvint4_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -103,6 +105,7 @@ def test_restful_chat_kvint4_tp4(config, worker_id):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=8), indirect=True)
def test_restful_chat_kvint8_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def getModelList(tp_num):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=1), indirect=True)
def test_restful_chat_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -81,6 +82,7 @@ def getKvintModelList(tp_num, quant_policy):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=4), indirect=True)
def test_restful_chat_kvint4_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -117,6 +119,7 @@ def test_restful_chat_kvint4_tp4(config, common_case_config, worker_id):
@pytest.mark.usefixtures('common_case_config')
@pytest.mark.restful_api
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=8), indirect=True)
def test_restful_chat_kvint8_tp1(config, common_case_config, worker_id):
if get_workerid(worker_id) is None:
Expand Down
3 changes: 3 additions & 0 deletions autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def getModelList(tp_num):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getModelList(tp_num=1), indirect=True)
def test_restful_chat_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -77,6 +78,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=4), indirect=True)
def test_restful_chat_kvint4_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down Expand Up @@ -110,6 +112,7 @@ def test_restful_chat_kvint4_tp4(config, worker_id):
@pytest.mark.order(7)
@pytest.mark.restful_api_vl
@pytest.mark.gpu_num_1
@pytest.mark.test_3090
@pytest.mark.parametrize('prepare_environment', getKvintModelList(tp_num=1, quant_policy=8), indirect=True)
def test_restful_chat_kvint8_tp1(config, worker_id):
if get_workerid(worker_id) is None:
Expand Down
Loading