diff --git a/tests/integration/defs/accuracy/test_cli_flow.py b/tests/integration/defs/accuracy/test_cli_flow.py index e25bc5a3dc3..20258e88baa 100644 --- a/tests/integration/defs/accuracy/test_cli_flow.py +++ b/tests/integration/defs/accuracy/test_cli_flow.py @@ -18,13 +18,19 @@ MedusaDecodingConfig) from tensorrt_llm.quantization import QuantAlgo -from ..conftest import (llm_models_root, parametrize_with_ids, skip_no_nvls, - skip_post_blackwell, skip_pre_ada, skip_pre_blackwell, - skip_pre_hopper) +from ..conftest import (get_sm_version, llm_models_root, parametrize_with_ids, + skip_no_nvls, skip_post_blackwell, skip_pre_ada, + skip_pre_blackwell, skip_pre_hopper) from .accuracy_core import (MMLU, CliFlowAccuracyTestHarness, CnnDailymail, Humaneval, PassKeyRetrieval64k, PassKeyRetrieval128k, SlimPajama6B, ZeroScrolls) +# skip trt flow cases on post-Blackwell-Ultra +if get_sm_version() >= 103: + pytest.skip( + "TRT workflow tests are not supported on post Blackwell-Ultra architecture", + allow_module_level=True) + class TestGpt2(CliFlowAccuracyTestHarness): MODEL_NAME = "gpt2" diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 8fd7b2b5ffa..42aae50ffbe 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -391,3 +391,10 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache] S triton_server/test_triton.py::test_llava[llava] SKIP (https://nvbugs/5547414) disaggregated/test_workers.py::test_workers_kv_cache_aware_router[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5607238) unittest/executor/test_rpc_proxy.py SKIP (https://nvbugs/5605741) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5569696) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm-auto] SKIP (https://nvbugs/5569719) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[tp2-trtllm-fp8] SKIP (https://nvbugs/5569719) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm-auto] SKIP (https://nvbugs/5569719) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[ep2-trtllm-fp8] SKIP (https://nvbugs/5569719) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm-auto] SKIP (https://nvbugs/5569719) +full:RTX/accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_2gpus[dp2-trtllm-fp8] SKIP (https://nvbugs/5569719)