diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45fd7395b89..881a32c09ef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,1383 @@ +# Files to be formatted by isort, yapf, and autoflake. Other files are formatted by ruff. +# Keep this list in sync with pyproject.toml. +common-files: &common_files | + (?x)^( + .devcontainer/make_env.py | + .github/scripts/label_community_user.py | + .github/scripts/pr_checklist_check.py | + benchmarks/cpp/__init__.py | + benchmarks/cpp/prepare_dataset.py | + benchmarks/cpp/utils/__init__.py | + benchmarks/cpp/utils/convert_nemo_dataset.py | + benchmarks/cpp/utils/generate_rand_loras.py | + benchmarks/cpp/utils/prepare_real_data.py | + benchmarks/cpp/utils/prepare_synthetic_data.py | + benchmarks/cpp/utils/utils.py | + cpp/conanfile.py | + cpp/kernels/fmha_v2/conftest.py | + cpp/kernels/fmha_v2/fmha_test.py | + cpp/kernels/fmha_v2/setup.py | + cpp/kernels/fmha_v2/test/conftest.py | + cpp/kernels/fmha_v2/test/fmha/filter_rules.py | + cpp/kernels/fmha_v2/test/fmha/test_fmha_exe.py | + cpp/kernels/fmha_v2/test/fmha/test_fmhca_exe.py | + cpp/kernels/fmha_v2/test/fmha/test_meta.py | + cpp/kernels/fmha_v2/test/fmha/utils.py | + cpp/kernels/fmha_v2/test/train_ops/test_train_ops.py | + cpp/kernels/fmha_v2/train_ops/fmha_bmark.py | + cpp/kernels/fmha_v2/train_ops/fmha_unit_test.py | + cpp/kernels/fmha_v2/train_ops/my_utils.py | + cpp/kernels/fmha_v2/train_ops/te_mha.py | + cpp/kernels/fmha_v2/train_ops/train_setup.py | + cpp/kernels/xqa/gen_cpp_header.py | + cpp/kernels/xqa/gen_cubins.py | + cpp/kernels/xqa/ref.py | + cpp/libnuma_conan.py | + cpp/micro_benchmarks/gen-moe-benchmark-file.py | + cpp/tensorrt_llm/deep_ep/strip_nvshmem_helper.py | + cpp/tensorrt_llm/kernels/cutlass_kernels/python/generate_kernels.py | + cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/copy_cu.py | + cpp/tests/resources/scripts/build_chatglm_engines.py | + cpp/tests/resources/scripts/build_eagle_engines.py | + cpp/tests/resources/scripts/build_enc_dec_engines.py | + cpp/tests/resources/scripts/build_engines_utils.py | + cpp/tests/resources/scripts/build_gpt_engines.py | + cpp/tests/resources/scripts/build_gptj_engines.py | + cpp/tests/resources/scripts/build_llama_engines.py | + cpp/tests/resources/scripts/build_mamba_engines.py | + cpp/tests/resources/scripts/build_medusa_engines.py | + cpp/tests/resources/scripts/build_recurrentgemma_engines.py | + cpp/tests/resources/scripts/build_redrafter_engines.py | + cpp/tests/resources/scripts/generate_expected_chatglm_output.py | + cpp/tests/resources/scripts/generate_expected_eagle_output.py | + cpp/tests/resources/scripts/generate_expected_enc_dec_output.py | + cpp/tests/resources/scripts/generate_expected_gpt_output.py | + cpp/tests/resources/scripts/generate_expected_gptj_output.py | + cpp/tests/resources/scripts/generate_expected_llama_output.py | + cpp/tests/resources/scripts/generate_expected_mamba_output.py | + cpp/tests/resources/scripts/generate_expected_medusa_output.py | + cpp/tests/resources/scripts/generate_expected_recurrentgemma_output.py | + cpp/tests/resources/scripts/generate_expected_redrafter_output.py | + cpp/tests/resources/scripts/generate_hf_gpt_output.py | + cpp/tests/resources/scripts/generate_test_lora_weights.py | + cpp/tests/resources/scripts/io_converter.py | + docs/source/conf.py | + docs/source/helper.py | + examples/apps/chat.py | + examples/apps/fastapi_server.py | + examples/bindings/executor/example_advanced.py | + examples/bindings/executor/example_basic.py | + examples/bindings/executor/example_debug.py | + examples/bindings/executor/example_logits_processor.py | + examples/disaggregated/clients/disagg_client.py | + examples/disaggregated/slurm/benchmark/gen_server_config.py | + examples/disaggregated/slurm/benchmark/gen_worker_config.py | + examples/disaggregated/slurm/benchmark/submit.py | + examples/dora/normalize_weights.py | + examples/eagle/convert_checkpoint.py | + examples/eval_long_context.py | + examples/generate_checkpoint_config.py | + examples/generate_xgrammar_tokenizer_info.py | + examples/hf_lora_convert.py | + examples/infinitebench/args.py | + examples/infinitebench/compute_scores.py | + examples/infinitebench/construct_synthetic_dataset.py | + examples/infinitebench/eval_utils.py | + examples/layer_wise_benchmarks/run_single.py | + examples/llm-api/_tensorrt_engine/llm_eagle_decoding.py | + examples/llm-api/_tensorrt_engine/llm_eagle2_decoding.py | + examples/llm-api/_tensorrt_engine/llm_inference_customize.py | + examples/llm-api/_tensorrt_engine/llm_inference_kv_events.py | + examples/llm-api/_tensorrt_engine/llm_lookahead_decoding.py | + examples/llm-api/_tensorrt_engine/llm_medusa_decoding.py | + examples/llm-api/_tensorrt_engine/llm_quantization.py | + examples/llm-api/_tensorrt_engine/quickstart_example.py | + examples/llm-api/llm_guided_decoding.py | + examples/llm-api/llm_inference_async_streaming.py | + examples/llm-api/llm_inference_async.py | + examples/llm-api/llm_inference_distributed.py | + examples/llm-api/llm_inference.py | + examples/llm-api/llm_kv_cache_connector.py | + examples/llm-api/llm_kv_cache_offloading.py | + examples/llm-api/llm_logits_processor.py | + examples/llm-api/llm_multilora.py | + examples/llm-api/llm_runtime.py | + examples/llm-api/llm_sampling.py | + examples/llm-api/llm_sparse_attention.py | + examples/llm-api/llm_speculative_decoding.py | + examples/llm-api/out_of_tree_example/main.py | + examples/llm-api/out_of_tree_example/modeling_opt.py | + examples/llm-api/quickstart_advanced.py | + examples/llm-api/quickstart_example.py | + examples/llm-api/quickstart_multimodal.py | + examples/llm-api/star_attention.py | + examples/llm-eval/lm-eval-harness/lm_eval_tensorrt_llm.py | + examples/longbench/eval_longbench_v1.py | + examples/longbench/eval_longbench_v2.py | + examples/medusa/convert_checkpoint.py | + examples/mmlu.py | + examples/models/contrib/baichuan/convert_checkpoint.py | + examples/models/contrib/bloom/convert_checkpoint.py | + examples/models/contrib/chatglm-6b/tokenization_chatglm.py | + examples/models/contrib/chatglm2-6b/tokenization_chatglm.py | + examples/models/contrib/chatglm3-6b-32k/tokenization_chatglm.py | + examples/models/contrib/cogvlm/convert_checkpoint.py | + examples/models/contrib/dbrx/convert_checkpoint.py | + examples/models/contrib/deepseek_v1/__init__.py | + examples/models/contrib/deepseek_v1/convert_checkpoint.py | + examples/models/contrib/deepseek_v2/convert_checkpoint.py | + examples/models/contrib/dit/convert_checkpoint.py | + examples/models/contrib/dit/diffusion.py | + examples/models/contrib/dit/sample.py | + examples/models/contrib/dit/utils_modelopt.py | + examples/models/contrib/dit/vae_decoder_trt.py | + examples/models/contrib/falcon/convert_checkpoint.py | + examples/models/contrib/gptj/convert_checkpoint.py | + examples/models/contrib/gptneox/convert_checkpoint.py | + examples/models/contrib/grok/convert_checkpoint.py | + examples/models/contrib/mmdit/convert_checkpoint.py | + examples/models/contrib/mmdit/sample.py | + examples/models/contrib/mpt/convert_checkpoint.py | + examples/models/contrib/opt/convert_checkpoint.py | + examples/models/contrib/sdxl/build_sdxl_unet.py | + examples/models/contrib/sdxl/pipeline_stable_diffusion_xl.py | + examples/models/contrib/sdxl/run_sdxl.py | + examples/models/contrib/stdit/aspect.py | + examples/models/contrib/stdit/convert_checkpoint.py | + examples/models/contrib/stdit/pipeline_tllm.py | + examples/models/contrib/stdit/sample.py | + examples/models/contrib/stdit/scheduler.py | + examples/models/contrib/stdit/text_encoder.py | + examples/models/contrib/stdit/utils.py | + examples/models/contrib/stdit/vae.py | + examples/models/contrib/stdit/video_transforms.py | + examples/models/core/bert/__init__.py | + examples/models/core/bert/convert_checkpoint.py | + examples/models/core/bert/run.py | + examples/models/core/bert/utils.py | + examples/models/core/commandr/convert_checkpoint.py | + examples/models/core/enc_dec/__init__.py | + examples/models/core/enc_dec/convert_checkpoint.py | + examples/models/core/enc_dec/helper.py | + examples/models/core/enc_dec/run.py | + examples/models/core/gemma/convert_checkpoint.py | + examples/models/core/glm-4-9b/convert_checkpoint.py | + examples/models/core/glm-4-9b/tokenization_chatglm.py | + examples/models/core/gpt_oss/openai_chat_client_function_calling.py | + examples/models/core/gpt/convert_checkpoint.py | + examples/models/core/gpt/merge_ptuning_tables.py | + examples/models/core/gpt/nemo_lora_convert.py | + examples/models/core/gpt/nemo_prompt_convert.py | + examples/models/core/gpt/run_hf.py | + examples/models/core/internlm2/convert_checkpoint.py | + examples/models/core/kimi_k2/kimi_k2_tool_calling_example.py | + examples/models/core/llama/convert_checkpoint.py | + examples/models/core/llama/summarize_long.py | + examples/models/core/mamba/convert_checkpoint.py | + examples/models/core/mllama/convert_checkpoint.py | + examples/models/core/multimodal/__init__.py | + examples/models/core/multimodal/build_multimodal_engine.py | + examples/models/core/multimodal/eval.py | + examples/models/core/multimodal/run.py | + examples/models/core/multimodal/utils.py | + examples/models/core/nemotron_nas/calibration_utils.py | + examples/models/core/nemotron_nas/convert_checkpoint.py | + examples/models/core/phi/convert_checkpoint.py | + examples/models/core/qwen/convert_checkpoint.py | + examples/models/core/qwen2audio/run_chat.py | + examples/models/core/qwen2audio/run.py | + examples/models/core/qwen2audio/utils.py | + examples/models/core/qwenvl/run_chat.py | + examples/models/core/qwenvl/run.py | + examples/models/core/qwenvl/show_pic.py | + examples/models/core/qwenvl/vit_onnx_trt.py | + examples/models/core/recurrentgemma/convert_checkpoint.py | + examples/models/core/vit/convert_checkpoint.py | + examples/models/core/whisper/convert_checkpoint.py | + examples/models/core/whisper/distil_whisper/convert_from_distil_whisper.py | + examples/models/core/whisper/run.py | + examples/models/core/whisper/tokenizer.py | + examples/models/core/whisper/whisper_utils.py | + examples/ngram/run_dtm_ngram.py | + examples/openai_triton/manual_plugin/build.py | + examples/openai_triton/manual_plugin/fmha_triton.py | + examples/openai_triton/manual_plugin/plugin.py | + examples/openai_triton/manual_plugin/run.py | + examples/openai_triton/plugin_autogen/build_engine.py | + examples/openai_triton/plugin_autogen/kernel_config.py | + examples/openai_triton/plugin_autogen/run_engine.py | + examples/python_plugin/build_lookup.py | + examples/python_plugin/plugin_lib/__init__.py | + examples/python_plugin/plugin_lib/lookup_kernel.py | + examples/python_plugin/plugin_lib/lookup_plugin.py | + examples/python_plugin/run_lookup.py | + examples/quantization/quantize_mixed_precision_moe.py | + examples/quantization/quantize.py | + examples/ray_orchestrator/llm_inference_async_ray.py | + examples/ray_orchestrator/llm_inference_distributed_ray.py | + examples/redrafter/convert_checkpoint.py | + examples/run.py | + examples/scaffolding/contrib/AsyncGeneration/stream_generation_controller.py | + examples/scaffolding/contrib/AsyncGeneration/stream_generation_run.py | + examples/scaffolding/contrib/DeepConf/run_generation.py | + examples/scaffolding/contrib/Dynasor/scaffolding_dynasor_run.py | + examples/scaffolding/contrib/mcp/e2b/e2bserver.py | + examples/scaffolding/contrib/mcp/e2b/main.py | + examples/scaffolding/contrib/mcp/mcptest.py | + examples/scaffolding/contrib/mcp/weather/weather.py | + examples/scaffolding/contrib/mcp/websearch/main.py | + examples/scaffolding/contrib/mcp/websearch/websearch.py | + examples/scaffolding/contrib/TreeInference/run_mcts_example.py | + examples/scaffolding/contrib/TreeInference/run_tot_example.py | + examples/scaffolding/run_basic_generation.py | + examples/scaffolding/run_best_of_n_with_reward.py | + examples/scaffolding/run_majority_vote_aime24.py | + examples/scaffolding/token_budget_majority_vote.py | + examples/serve/openai_chat_client_for_multimodal.py | + examples/serve/openai_chat_client.py | + examples/serve/openai_completion_client_for_lora.py | + examples/serve/openai_completion_client_json_schema.py | + examples/serve/openai_completion_client.py | + examples/summarize.py | + examples/utils.py | + examples/wide_ep/ep_load_balancer/generate_eplb_config.py | + examples/wide_ep/ep_load_balancer/report_load_statistics.py | + examples/wide_ep/ep_load_balancer/utils.py | + examples/wide_ep/slurm_scripts/process_gen_iterlog.py | + jenkins/scripts/mergeWaiveList.py | + jenkins/scripts/open_search_db.py | + jenkins/scripts/test_rerun.py | + scripts/build_cpp_examples.py | + scripts/build_wheel.py | + scripts/check_test_list.py | + scripts/dco_check.py | + scripts/format_test_list.py | + scripts/generate_duration.py | + scripts/generate_lock_file.py | + scripts/get_wheel_from_package.py | + scripts/git_replace.py | + scripts/package_trt_llm.py | + scripts/release_check.py | + scripts/rename_docker_images.py | + scripts/test_to_stage_mapping.py | + setup.py | + tensorrt_llm/__init__.py | + tensorrt_llm/_ray_utils.py | + tensorrt_llm/_tensorrt_engine/__init__.py | + tensorrt_llm/_torch/__init__.py | + tensorrt_llm/_torch/attention_backend/__init__.py | + tensorrt_llm/_torch/attention_backend/flashinfer.py | + tensorrt_llm/_torch/attention_backend/interface.py | + tensorrt_llm/_torch/attention_backend/sparse/__init__.py | + tensorrt_llm/_torch/attention_backend/sparse/dsa.py | + tensorrt_llm/_torch/attention_backend/sparse/kernel.py | + tensorrt_llm/_torch/attention_backend/sparse/rocket.py | + tensorrt_llm/_torch/attention_backend/sparse/utils.py | + tensorrt_llm/_torch/attention_backend/star_flashinfer.py | + tensorrt_llm/_torch/attention_backend/trtllm.py | + tensorrt_llm/_torch/attention_backend/utils.py | + tensorrt_llm/_torch/attention_backend/vanilla.py | + tensorrt_llm/_torch/autotuner.py | + tensorrt_llm/_torch/compilation/__init__.py | + tensorrt_llm/_torch/compilation/backend.py | + tensorrt_llm/_torch/compilation/multi_stream/__init__.py | + tensorrt_llm/_torch/compilation/multi_stream/auto_multi_stream.py | + tensorrt_llm/_torch/compilation/patterns/__init__.py | + tensorrt_llm/_torch/compilation/patterns/ar_residual_norm.py | + tensorrt_llm/_torch/compilation/patterns/residual_add_norm.py | + tensorrt_llm/_torch/compilation/piecewise_optimizer.py | + tensorrt_llm/_torch/compilation/recover_pass.py | + tensorrt_llm/_torch/compilation/remove_copy_pass.py | + tensorrt_llm/_torch/compilation/utils.py | + tensorrt_llm/_torch/configs/deepseek_v3.py | + tensorrt_llm/_torch/cublaslt_utils.py | + tensorrt_llm/_torch/custom_ops/__init__.py | + tensorrt_llm/_torch/custom_ops/cpp_custom_ops.py | + tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py | + tensorrt_llm/_torch/custom_ops/flashinfer_custom_ops.py | + tensorrt_llm/_torch/custom_ops/torch_custom_ops.py | + tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py | + tensorrt_llm/_torch/custom_ops/userbuffers_custom_ops.py | + tensorrt_llm/_torch/cute_dsl_kernels/__init__.py | + tensorrt_llm/_torch/cute_dsl_kernels/blackwell/__init__.py | + tensorrt_llm/_torch/cute_dsl_kernels/blackwell/custom_pipeline.py | + tensorrt_llm/_torch/cute_dsl_kernels/blackwell/dense_blockscaled_gemm_persistent.py | + tensorrt_llm/_torch/cute_dsl_kernels/blackwell/utils.py | + tensorrt_llm/_torch/cute_dsl_utils.py | + tensorrt_llm/_torch/debug/__init__.py | + tensorrt_llm/_torch/debug/debug_hook.py | + tensorrt_llm/_torch/device_mesh.py | + tensorrt_llm/_torch/distributed/__init__.py | + tensorrt_llm/_torch/distributed/communicator.py | + tensorrt_llm/_torch/distributed/moe_alltoall.py | + tensorrt_llm/_torch/distributed/ops.py | + tensorrt_llm/_torch/distributed/pg_utils.py | + tensorrt_llm/_torch/expert_statistic.py | + tensorrt_llm/_torch/flashinfer_utils.py | + tensorrt_llm/_torch/hostfunc.py | + tensorrt_llm/_torch/llm.py | + tensorrt_llm/_torch/memory_buffer_utils.py | + tensorrt_llm/_torch/metadata.py | + tensorrt_llm/_torch/model_config.py | + tensorrt_llm/_torch/models/__init__.py | + tensorrt_llm/_torch/models/checkpoints/__init__.py | + tensorrt_llm/_torch/models/checkpoints/auto_mapper.py | + tensorrt_llm/_torch/models/checkpoints/base_checkpoint_loader.py | + tensorrt_llm/_torch/models/checkpoints/base_config_loader.py | + tensorrt_llm/_torch/models/checkpoints/base_weight_loader.py | + tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/__init__.py | + tensorrt_llm/_torch/models/checkpoints/hf/checkpoint_loader.py | + tensorrt_llm/_torch/models/checkpoints/hf/config_loader.py | + tensorrt_llm/_torch/models/checkpoints/hf/gemma3_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/llama4_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/mixtral_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/nemotron_h_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/qwen2_moe_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/qwen2vl_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/qwen3_next_weight_mapper.py | + tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py | + tensorrt_llm/_torch/models/checkpoints/hf/weight_mapper.py | + tensorrt_llm/_torch/models/modeling_auto.py | + tensorrt_llm/_torch/models/modeling_bert.py | + tensorrt_llm/_torch/models/modeling_clip.py | + tensorrt_llm/_torch/models/modeling_deepseekv3.py | + tensorrt_llm/_torch/models/modeling_exaone4.py | + tensorrt_llm/_torch/models/modeling_gemma3.py | + tensorrt_llm/_torch/models/modeling_gemma3vl.py | + tensorrt_llm/_torch/models/modeling_gpt_oss.py | + tensorrt_llm/_torch/models/modeling_hunyuan_dense.py | + tensorrt_llm/_torch/models/modeling_hunyuan_moe.py | + tensorrt_llm/_torch/models/modeling_hyperclovax.py | + tensorrt_llm/_torch/models/modeling_llama_min_latency.py | + tensorrt_llm/_torch/models/modeling_llama.py | + tensorrt_llm/_torch/models/modeling_llava_next.py | + tensorrt_llm/_torch/models/modeling_mistral.py | + tensorrt_llm/_torch/models/modeling_mixtral.py | + tensorrt_llm/_torch/models/modeling_mllama.py | + tensorrt_llm/_torch/models/modeling_multimodal_encoder.py | + tensorrt_llm/_torch/models/modeling_multimodal_utils.py | + tensorrt_llm/_torch/models/modeling_nanov2vlm.py | + tensorrt_llm/_torch/models/modeling_nemotron_h.py | + tensorrt_llm/_torch/models/modeling_nemotron_nas.py | + tensorrt_llm/_torch/models/modeling_nemotron.py | + tensorrt_llm/_torch/models/modeling_phi3.py | + tensorrt_llm/_torch/models/modeling_phi4mm.py | + tensorrt_llm/_torch/models/modeling_qwen_moe.py | + tensorrt_llm/_torch/models/modeling_qwen.py | + tensorrt_llm/_torch/models/modeling_qwen2vl.py | + tensorrt_llm/_torch/models/modeling_qwen3_moe.py | + tensorrt_llm/_torch/models/modeling_qwen3_next.py | + tensorrt_llm/_torch/models/modeling_qwen3.py | + tensorrt_llm/_torch/models/modeling_radio.py | + tensorrt_llm/_torch/models/modeling_seedoss.py | + tensorrt_llm/_torch/models/modeling_siglip.py | + tensorrt_llm/_torch/models/modeling_speculative.py | + tensorrt_llm/_torch/models/modeling_utils.py | + tensorrt_llm/_torch/models/modeling_vila.py | + tensorrt_llm/_torch/modules/__init__.py | + tensorrt_llm/_torch/modules/attention.py | + tensorrt_llm/_torch/modules/decoder_layer.py | + tensorrt_llm/_torch/modules/embedding.py | + tensorrt_llm/_torch/modules/fla/__init__.py | + tensorrt_llm/_torch/modules/fla/chunk_delta_h.py | + tensorrt_llm/_torch/modules/fla/chunk_o.py | + tensorrt_llm/_torch/modules/fla/chunk_scaled_dot_kkt.py | + tensorrt_llm/_torch/modules/fla/chunk.py | + tensorrt_llm/_torch/modules/fla/cumsum.py | + tensorrt_llm/_torch/modules/fla/fused_recurrent.py | + tensorrt_llm/_torch/modules/fla/fused_sigmoid_gating_recurrent.py | + tensorrt_llm/_torch/modules/fla/index.py | + tensorrt_llm/_torch/modules/fla/l2norm.py | + tensorrt_llm/_torch/modules/fla/layernorm_gated.py | + tensorrt_llm/_torch/modules/fla/op.py | + tensorrt_llm/_torch/modules/fla/solve_tril.py | + tensorrt_llm/_torch/modules/fla/utils.py | + tensorrt_llm/_torch/modules/fla/wy_fast.py | + tensorrt_llm/_torch/modules/fused_moe/__init__.py | + tensorrt_llm/_torch/modules/fused_moe/create_moe.py | + tensorrt_llm/_torch/modules/fused_moe/deep_ep_utils.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_cutlass.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_deepgemm.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_triton.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_vanilla.py | + tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py | + tensorrt_llm/_torch/modules/fused_moe/interface.py | + tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py | + tensorrt_llm/_torch/modules/fused_moe/ops/__init__.py | + tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_cutlass.py | + tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_deepgemm.py | + tensorrt_llm/_torch/modules/fused_moe/ops/moe_op.py | + tensorrt_llm/_torch/modules/fused_moe/quantization.py | + tensorrt_llm/_torch/modules/fused_moe/routing.py | + tensorrt_llm/_torch/modules/gated_mlp.py | + tensorrt_llm/_torch/modules/layer_norm.py | + tensorrt_llm/_torch/modules/linear.py | + tensorrt_llm/_torch/modules/logits_processor.py | + tensorrt_llm/_torch/modules/mamba/__init__.py | + tensorrt_llm/_torch/modules/mamba/causal_conv1d.py | + tensorrt_llm/_torch/modules/mamba/layernorm_gated.py | + tensorrt_llm/_torch/modules/mamba/mamba2_metadata.py | + tensorrt_llm/_torch/modules/mamba/mamba2_mixer.py | + tensorrt_llm/_torch/modules/mamba/selective_state_update.py | + tensorrt_llm/_torch/modules/mamba/softplus.py | + tensorrt_llm/_torch/modules/mamba/ssd_bmm.py | + tensorrt_llm/_torch/modules/mamba/ssd_chunk_scan.py | + tensorrt_llm/_torch/modules/mamba/ssd_chunk_state.py | + tensorrt_llm/_torch/modules/mamba/ssd_combined.py | + tensorrt_llm/_torch/modules/mamba/ssd_state_passing.py | + tensorrt_llm/_torch/modules/mlp.py | + tensorrt_llm/_torch/modules/multi_stream_utils.py | + tensorrt_llm/_torch/modules/qk_norm_attention.py | + tensorrt_llm/_torch/modules/rms_norm.py | + tensorrt_llm/_torch/modules/rotary_embedding.py | + tensorrt_llm/_torch/modules/swiglu.py | + tensorrt_llm/_torch/modules/triton_linear.py | + tensorrt_llm/_torch/peft/__init__.py | + tensorrt_llm/_torch/peft/lora/__init__.py | + tensorrt_llm/_torch/peft/lora/layer.py | + tensorrt_llm/_torch/pyexecutor/__init__.py | + tensorrt_llm/_torch/pyexecutor/_util.py | + tensorrt_llm/_torch/pyexecutor/config_utils.py | + tensorrt_llm/_torch/pyexecutor/config.py | + tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py | + tensorrt_llm/_torch/pyexecutor/executor_request_queue.py | + tensorrt_llm/_torch/pyexecutor/finish_reason.py | + tensorrt_llm/_torch/pyexecutor/grammar_matcher.py | + tensorrt_llm/_torch/pyexecutor/guided_decoder.py | + tensorrt_llm/_torch/pyexecutor/handle_additional_outputs.py | + tensorrt_llm/_torch/pyexecutor/handle_logits.py | + tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py | + tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py | + tensorrt_llm/_torch/pyexecutor/layerwise_nvtx_marker.py | + tensorrt_llm/_torch/pyexecutor/llm_request.py | + tensorrt_llm/_torch/pyexecutor/make_decoding_batch_input_output.py | + tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py | + tensorrt_llm/_torch/pyexecutor/model_engine.py | + tensorrt_llm/_torch/pyexecutor/model_loader.py | + tensorrt_llm/_torch/pyexecutor/py_executor_creator.py | + tensorrt_llm/_torch/pyexecutor/py_executor.py | + tensorrt_llm/_torch/pyexecutor/resource_manager.py | + tensorrt_llm/_torch/pyexecutor/scheduler.py | + tensorrt_llm/_torch/pyexecutor/seq_slot_manager.py | + tensorrt_llm/_torch/shared_tensor/__init__.py | + tensorrt_llm/_torch/shared_tensor/shared_tensor.py | + tensorrt_llm/_torch/speculative/__init__.py | + tensorrt_llm/_torch/speculative/auto_heuristic.py | + tensorrt_llm/_torch/speculative/drafter.py | + tensorrt_llm/_torch/speculative/drafting_loops.py | + tensorrt_llm/_torch/speculative/eagle3.py | + tensorrt_llm/_torch/speculative/interface.py | + tensorrt_llm/_torch/speculative/model_drafter.py | + tensorrt_llm/_torch/speculative/mtp.py | + tensorrt_llm/_torch/speculative/ngram.py | + tensorrt_llm/_torch/speculative/save_hidden_state.py | + tensorrt_llm/_torch/speculative/spec_tree_manager.py | + tensorrt_llm/_torch/speculative/speculation_gate.py | + tensorrt_llm/_torch/speculative/utils.py | + tensorrt_llm/_torch/utils.py | + tensorrt_llm/_torch/virtual_memory.py | + tensorrt_llm/_utils.py | + tensorrt_llm/bench/__init__.py | + tensorrt_llm/bench/benchmark/__init__.py | + tensorrt_llm/bench/benchmark/low_latency.py | + tensorrt_llm/bench/benchmark/throughput.py | + tensorrt_llm/bench/benchmark/utils/__init__.py | + tensorrt_llm/bench/benchmark/utils/asynchronous.py | + tensorrt_llm/bench/benchmark/utils/general.py | + tensorrt_llm/bench/benchmark/utils/processes.py | + tensorrt_llm/bench/build/__init__.py | + tensorrt_llm/bench/build/build.py | + tensorrt_llm/bench/build/dataclasses.py | + tensorrt_llm/bench/build/tuning.py | + tensorrt_llm/bench/build/utils.py | + tensorrt_llm/bench/dataclasses/__init__.py | + tensorrt_llm/bench/dataclasses/configuration.py | + tensorrt_llm/bench/dataclasses/engine.py | + tensorrt_llm/bench/dataclasses/enums.py | + tensorrt_llm/bench/dataclasses/general.py | + tensorrt_llm/bench/dataclasses/reporting.py | + tensorrt_llm/bench/dataclasses/statistics.py | + tensorrt_llm/bench/utils/__init__.py | + tensorrt_llm/bench/utils/data.py | + tensorrt_llm/builder.py | + tensorrt_llm/commands/__init__.py | + tensorrt_llm/commands/bench.py | + tensorrt_llm/commands/build.py | + tensorrt_llm/commands/eval.py | + tensorrt_llm/commands/prune.py | + tensorrt_llm/commands/refit.py | + tensorrt_llm/commands/serve.py | + tensorrt_llm/evaluate/__init__.py | + tensorrt_llm/evaluate/cnn_dailymail.py | + tensorrt_llm/evaluate/interface.py | + tensorrt_llm/evaluate/json_mode_eval.py | + tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/_generate_configs.py | + tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/utils.py | + tensorrt_llm/evaluate/lm_eval.py | + tensorrt_llm/evaluate/longbench_v2.py | + tensorrt_llm/evaluate/mmlu.py | + tensorrt_llm/executor/__init__.py | + tensorrt_llm/executor/base_worker.py | + tensorrt_llm/executor/executor.py | + tensorrt_llm/executor/ipc.py | + tensorrt_llm/executor/postproc_worker.py | + tensorrt_llm/executor/proxy.py | + tensorrt_llm/executor/ray_executor.py | + tensorrt_llm/executor/ray_gpu_worker.py | + tensorrt_llm/executor/request.py | + tensorrt_llm/executor/result.py | + tensorrt_llm/executor/rpc_proxy.py | + tensorrt_llm/executor/rpc_worker.py | + tensorrt_llm/executor/rpc/__init__.py | + tensorrt_llm/executor/rpc/rpc_client.py | + tensorrt_llm/executor/rpc/rpc_common.py | + tensorrt_llm/executor/rpc/rpc_server.py | + tensorrt_llm/executor/utils.py | + tensorrt_llm/executor/worker.py | + tensorrt_llm/functional.py | + tensorrt_llm/inputs/__init__.py | + tensorrt_llm/inputs/data.py | + tensorrt_llm/inputs/evs.py | + tensorrt_llm/inputs/multimodal.py | + tensorrt_llm/inputs/registry.py | + tensorrt_llm/inputs/utils.py | + tensorrt_llm/layers/__init__.py | + tensorrt_llm/layers/activation.py | + tensorrt_llm/layers/attention.py | + tensorrt_llm/layers/cast.py | + tensorrt_llm/layers/conv.py | + tensorrt_llm/layers/embedding.py | + tensorrt_llm/layers/language_adapter.py | + tensorrt_llm/layers/linear.py | + tensorrt_llm/layers/lora.py | + tensorrt_llm/layers/mlp.py | + tensorrt_llm/layers/moe.py | + tensorrt_llm/layers/normalization.py | + tensorrt_llm/layers/pooling.py | + tensorrt_llm/layers/recurrent.py | + tensorrt_llm/layers/ssm.py | + tensorrt_llm/llmapi/__init__.py | + tensorrt_llm/llmapi/build_cache.py | + tensorrt_llm/llmapi/disagg_utils.py | + tensorrt_llm/llmapi/kv_cache_type.py | + tensorrt_llm/llmapi/llm_args.py | + tensorrt_llm/llmapi/llm_utils.py | + tensorrt_llm/llmapi/llm.py | + tensorrt_llm/llmapi/mgmn_leader_node.py | + tensorrt_llm/llmapi/mgmn_worker_node.py | + tensorrt_llm/llmapi/mm_encoder.py | + tensorrt_llm/llmapi/mpi_session.py | + tensorrt_llm/llmapi/reasoning_parser.py | + tensorrt_llm/llmapi/tokenizer.py | + tensorrt_llm/llmapi/tracer.py | + tensorrt_llm/llmapi/tracing.py | + tensorrt_llm/llmapi/utils.py | + tensorrt_llm/lora_helper.py | + tensorrt_llm/mapping.py | + tensorrt_llm/math_utils.py | + tensorrt_llm/metrics/__init__.py | + tensorrt_llm/metrics/collector.py | + tensorrt_llm/metrics/enums.py | + tensorrt_llm/models/__init__.py | + tensorrt_llm/models/automodel.py | + tensorrt_llm/models/baichuan/__init__.py | + tensorrt_llm/models/baichuan/config.py | + tensorrt_llm/models/baichuan/convert.py | + tensorrt_llm/models/baichuan/model.py | + tensorrt_llm/models/bert/__init__.py | + tensorrt_llm/models/bert/config.py | + tensorrt_llm/models/bert/convert.py | + tensorrt_llm/models/bert/model.py | + tensorrt_llm/models/bloom/__init__.py | + tensorrt_llm/models/bloom/model.py | + tensorrt_llm/models/chatglm/__init__.py | + tensorrt_llm/models/chatglm/config.py | + tensorrt_llm/models/chatglm/convert.py | + tensorrt_llm/models/chatglm/model.py | + tensorrt_llm/models/clip/__init__.py | + tensorrt_llm/models/clip/model.py | + tensorrt_llm/models/cogvlm/__init__.py | + tensorrt_llm/models/cogvlm/config.py | + tensorrt_llm/models/cogvlm/convert.py | + tensorrt_llm/models/cogvlm/model.py | + tensorrt_llm/models/commandr/__init__.py | + tensorrt_llm/models/commandr/config.py | + tensorrt_llm/models/commandr/model.py | + tensorrt_llm/models/convert_utils.py | + tensorrt_llm/models/dbrx/__init__.py | + tensorrt_llm/models/dbrx/config.py | + tensorrt_llm/models/dbrx/model.py | + tensorrt_llm/models/deepseek_v1/__init__.py | + tensorrt_llm/models/deepseek_v1/config.py | + tensorrt_llm/models/deepseek_v1/convert.py | + tensorrt_llm/models/deepseek_v1/model.py | + tensorrt_llm/models/deepseek_v2/__init__.py | + tensorrt_llm/models/deepseek_v2/config.py | + tensorrt_llm/models/deepseek_v2/convert.py | + tensorrt_llm/models/deepseek_v2/model.py | + tensorrt_llm/models/dit/__init__.py | + tensorrt_llm/models/dit/model.py | + tensorrt_llm/models/eagle/__init__.py | + tensorrt_llm/models/eagle/config.py | + tensorrt_llm/models/eagle/model.py | + tensorrt_llm/models/enc_dec/__init__.py | + tensorrt_llm/models/enc_dec/model.py | + tensorrt_llm/models/falcon/__init__.py | + tensorrt_llm/models/falcon/config.py | + tensorrt_llm/models/falcon/convert.py | + tensorrt_llm/models/falcon/model.py | + tensorrt_llm/models/gemma/__init__.py | + tensorrt_llm/models/gemma/config.py | + tensorrt_llm/models/gemma/convert.py | + tensorrt_llm/models/gemma/model.py | + tensorrt_llm/models/gemma/smoothquant.py | + tensorrt_llm/models/gemma/utils/__init__.py | + tensorrt_llm/models/gemma/utils/layers.py | + tensorrt_llm/models/gemma/utils/modules.py | + tensorrt_llm/models/gemma/utils/params.py | + tensorrt_llm/models/gemma/utils/positional_embeddings.py | + tensorrt_llm/models/gemma/utils/sampler.py | + tensorrt_llm/models/gemma/utils/transformer.py | + tensorrt_llm/models/gemma/weight.py | + tensorrt_llm/models/generation_mixin.py | + tensorrt_llm/models/gpt/__init__.py | + tensorrt_llm/models/gpt/config.py | + tensorrt_llm/models/gpt/convert.py | + tensorrt_llm/models/gpt/model.py | + tensorrt_llm/models/gptj/__init__.py | + tensorrt_llm/models/gptj/config.py | + tensorrt_llm/models/gptj/convert.py | + tensorrt_llm/models/gptj/model.py | + tensorrt_llm/models/gptneox/__init__.py | + tensorrt_llm/models/gptneox/model.py | + tensorrt_llm/models/grok/__init__.py | + tensorrt_llm/models/grok/convert.py | + tensorrt_llm/models/grok/model.py | + tensorrt_llm/models/grok/weight.py | + tensorrt_llm/models/llama/__init__.py | + tensorrt_llm/models/llama/config.py | + tensorrt_llm/models/llama/convert.py | + tensorrt_llm/models/llama/model.py | + tensorrt_llm/models/mamba/__init__.py | + tensorrt_llm/models/mamba/config.py | + tensorrt_llm/models/mamba/convert.py | + tensorrt_llm/models/mamba/model.py | + tensorrt_llm/models/medusa/__init__.py | + tensorrt_llm/models/medusa/config.py | + tensorrt_llm/models/medusa/model.py | + tensorrt_llm/models/medusa/weight.py | + tensorrt_llm/models/mllama/__init__.py | + tensorrt_llm/models/mllama/config.py | + tensorrt_llm/models/mllama/model.py | + tensorrt_llm/models/mmdit_sd3/__init__.py | + tensorrt_llm/models/mmdit_sd3/config.py | + tensorrt_llm/models/mmdit_sd3/model.py | + tensorrt_llm/models/model_weights_loader.py | + tensorrt_llm/models/modeling_utils.py | + tensorrt_llm/models/mpt/__init__.py | + tensorrt_llm/models/mpt/model.py | + tensorrt_llm/models/multimodal_encoders/__init__.py | + tensorrt_llm/models/multimodal_encoders/config.py | + tensorrt_llm/models/multimodal_encoders/model.py | + tensorrt_llm/models/nemotron_nas/__init__.py | + tensorrt_llm/models/nemotron_nas/config.py | + tensorrt_llm/models/nemotron_nas/convert.py | + tensorrt_llm/models/nemotron_nas/layer_config.py | + tensorrt_llm/models/nemotron_nas/model.py | + tensorrt_llm/models/opt/__init__.py | + tensorrt_llm/models/opt/model.py | + tensorrt_llm/models/phi/__init__.py | + tensorrt_llm/models/phi/config.py | + tensorrt_llm/models/phi/convert.py | + tensorrt_llm/models/phi/model.py | + tensorrt_llm/models/phi3/__init__.py | + tensorrt_llm/models/phi3/config.py | + tensorrt_llm/models/phi3/convert.py | + tensorrt_llm/models/phi3/model.py | + tensorrt_llm/models/phi3/split_weights.py | + tensorrt_llm/models/qwen/__init__.py | + tensorrt_llm/models/qwen/config.py | + tensorrt_llm/models/qwen/convert.py | + tensorrt_llm/models/qwen/model.py | + tensorrt_llm/models/qwen/utils.py | + tensorrt_llm/models/recurrentgemma/__init__.py | + tensorrt_llm/models/recurrentgemma/model.py | + tensorrt_llm/models/redrafter/__init__.py | + tensorrt_llm/models/redrafter/drafter.py | + tensorrt_llm/models/redrafter/model.py | + tensorrt_llm/models/redrafter/redrafter_helper.py | + tensorrt_llm/models/stdit/__init__.py | + tensorrt_llm/models/stdit/config.py | + tensorrt_llm/models/stdit/model.py | + tensorrt_llm/models/unet/__init__.py | + tensorrt_llm/models/unet/attention.py | + tensorrt_llm/models/unet/embeddings.py | + tensorrt_llm/models/unet/pp/__init__.py | + tensorrt_llm/models/unet/pp/attention.py | + tensorrt_llm/models/unet/pp/conv2d.py | + tensorrt_llm/models/unet/pp/groupnorm.py | + tensorrt_llm/models/unet/pp/unet_pp.py | + tensorrt_llm/models/unet/resnet.py | + tensorrt_llm/models/unet/unet_2d_blocks.py | + tensorrt_llm/models/unet/unet_2d_condition.py | + tensorrt_llm/models/unet/weights.py | + tensorrt_llm/network.py | + tensorrt_llm/parameter.py | + tensorrt_llm/plugin/__init__.py | + tensorrt_llm/plugin/plugin.py | + tensorrt_llm/quantization/__init__.py | + tensorrt_llm/quantization/functional.py | + tensorrt_llm/quantization/image_processing.py | + tensorrt_llm/quantization/layers.py | + tensorrt_llm/quantization/mode.py | + tensorrt_llm/quantization/quantize_by_modelopt.py | + tensorrt_llm/quantization/quantize.py | + tensorrt_llm/quantization/utils/__init__.py | + tensorrt_llm/quantization/utils/fp4_utils.py | + tensorrt_llm/quantization/utils/fp8_utils.py | + tensorrt_llm/ray_stub.py | + tensorrt_llm/runtime/__init__.py | + tensorrt_llm/runtime/enc_dec_model_runner.py | + tensorrt_llm/runtime/generation.py | + tensorrt_llm/runtime/kv_cache_manager.py | + tensorrt_llm/runtime/medusa_utils.py | + tensorrt_llm/runtime/memory_pools/__init__.py | + tensorrt_llm/runtime/memory_pools/memory_pools_allocator.py | + tensorrt_llm/runtime/memory_pools/pool.py | + tensorrt_llm/runtime/memory_pools/pools_kv_cache_manager.py | + tensorrt_llm/runtime/model_runner_cpp.py | + tensorrt_llm/runtime/model_runner.py | + tensorrt_llm/runtime/multimodal_model_runner.py | + tensorrt_llm/runtime/processor_wrapper/__init__.py | + tensorrt_llm/runtime/processor_wrapper/mllama_processor_wrapper.py | + tensorrt_llm/runtime/processor_wrapper/processor_wrapper.py | + tensorrt_llm/runtime/redrafter_utils.py | + tensorrt_llm/runtime/session.py | + tensorrt_llm/scaffolding/__init__.py | + tensorrt_llm/scaffolding/benchmark.py | + tensorrt_llm/scaffolding/contrib/__init__.py | + tensorrt_llm/scaffolding/contrib/AsyncGeneration/__init__.py | + tensorrt_llm/scaffolding/contrib/AsyncGeneration/stream_generation.py | + tensorrt_llm/scaffolding/contrib/DeepConf/__init__.py | + tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_controller.py | + tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_utils.py | + tensorrt_llm/scaffolding/contrib/Dynasor/__init__.py | + tensorrt_llm/scaffolding/contrib/Dynasor/dynasor_controller.py | + tensorrt_llm/scaffolding/contrib/Dynasor/evaluator.py | + tensorrt_llm/scaffolding/contrib/mcp/__init__.py | + tensorrt_llm/scaffolding/contrib/mcp/chat_handler.py | + tensorrt_llm/scaffolding/contrib/mcp/chat_task.py | + tensorrt_llm/scaffolding/contrib/mcp/mcp_controller.py | + tensorrt_llm/scaffolding/contrib/mcp/mcp_task.py | + tensorrt_llm/scaffolding/contrib/mcp/mcp_utils.py | + tensorrt_llm/scaffolding/contrib/mcp/mcp_worker.py | + tensorrt_llm/scaffolding/contrib/TreeInference/__init__.py | + tensorrt_llm/scaffolding/contrib/TreeInference/tree_controllers.py | + tensorrt_llm/scaffolding/controller.py | + tensorrt_llm/scaffolding/math_utils.py | + tensorrt_llm/scaffolding/result.py | + tensorrt_llm/scaffolding/scaffolding_llm.py | + tensorrt_llm/scaffolding/task_collection.py | + tensorrt_llm/scaffolding/task.py | + tensorrt_llm/scaffolding/worker.py | + tensorrt_llm/scheduling_params.py | + tensorrt_llm/serialization.py | + tensorrt_llm/serve/__init__.py | + tensorrt_llm/serve/chat_utils.py | + tensorrt_llm/serve/cluster_storage.py | + tensorrt_llm/serve/disagg_auto_scaling.py | + tensorrt_llm/serve/harmony_adapter.py | + tensorrt_llm/serve/metadata_server.py | + tensorrt_llm/serve/openai_disagg_server.py | + tensorrt_llm/serve/openai_protocol.py | + tensorrt_llm/serve/openai_server.py | + tensorrt_llm/serve/postprocess_handlers.py | + tensorrt_llm/serve/responses_utils.py | + tensorrt_llm/serve/router.py | + tensorrt_llm/serve/scripts/__init__.py | + tensorrt_llm/serve/scripts/backend_request_func.py | + tensorrt_llm/serve/scripts/benchmark_dataset.py | + tensorrt_llm/serve/scripts/benchmark_serving.py | + tensorrt_llm/serve/scripts/benchmark_utils.py | + tensorrt_llm/serve/scripts/time_breakdown/__init__.py | + tensorrt_llm/serve/scripts/time_breakdown/__main__.py | + tensorrt_llm/serve/scripts/time_breakdown/time_breakdown.py | + tensorrt_llm/serve/tool_parser/base_tool_parser.py | + tensorrt_llm/serve/tool_parser/qwen3_tool_parser.py | + tensorrt_llm/serve/tool_parser/utils.py | + tensorrt_llm/tools/__init__.py | + tensorrt_llm/tools/importlib_utils.py | + tensorrt_llm/tools/layer_wise_benchmarks/deepseekv3_runner.py | + tensorrt_llm/tools/multimodal_builder.py | + tensorrt_llm/tools/onnx_utils.py | + tensorrt_llm/tools/plugin_gen/__init__.py | + tensorrt_llm/tools/plugin_gen/core.py | + tensorrt_llm/tools/plugin_gen/plugin_gen.py | + tensorrt_llm/tools/plugin_gen/shape_infer.py | + tensorrt_llm/tools/plugin_gen/templates/functional.py | + tensorrt_llm/tools/ppl.py | + tensorrt_llm/tools/profiler/nsys_profile_tools/gputrc2graph.py | + tensorrt_llm/version.py | + tests/integration/defs/__init__.py | + tests/integration/defs/accuracy/__init__.py | + tests/integration/defs/accuracy/accuracy_core.py | + tests/integration/defs/accuracy/scripts/collect_evaluated_accuracies.py | + tests/integration/defs/accuracy/scripts/compute_theta_and_thresholds.py | + tests/integration/defs/accuracy/test_cli_flow.py | + tests/integration/defs/accuracy/test_disaggregated_serving.py | + tests/integration/defs/accuracy/test_llm_api_autodeploy.py | + tests/integration/defs/accuracy/test_llm_api_pytorch_ray.py | + tests/integration/defs/accuracy/test_llm_api_pytorch.py | + tests/integration/defs/accuracy/test_llm_api.py | + tests/integration/defs/ci_profiler.py | + tests/integration/defs/common.py | + tests/integration/defs/conftest.py | + tests/integration/defs/cpp/conftest.py | + tests/integration/defs/cpp/cpp_common.py | + tests/integration/defs/cpp/test_e2e.py | + tests/integration/defs/cpp/test_multi_gpu.py | + tests/integration/defs/cpp/test_unit_tests.py | + tests/integration/defs/deterministic/mixtral_deterministic.py | + tests/integration/defs/deterministic/test_mixtral_deterministic.py | + tests/integration/defs/disaggregated/test_auto_scaling.py | + tests/integration/defs/disaggregated/test_disaggregated_etcd.py | + tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py | + tests/integration/defs/disaggregated/test_disaggregated.py | + tests/integration/defs/disaggregated/test_workers.py | + tests/integration/defs/examples/run_llm_fp8_quant_llama_70b.py | + tests/integration/defs/examples/run_llm_quickstart_atexit.py | + tests/integration/defs/examples/serve/test_serve_negative.py | + tests/integration/defs/examples/serve/test_serve.py | + tests/integration/defs/examples/test_ad_guided_decoding.py | + tests/integration/defs/examples/test_bert.py | + tests/integration/defs/examples/test_bindings.py | + tests/integration/defs/examples/test_chatglm.py | + tests/integration/defs/examples/test_commandr.py | + tests/integration/defs/examples/test_draft_target_model.py | + tests/integration/defs/examples/test_eagle.py | + tests/integration/defs/examples/test_enc_dec.py | + tests/integration/defs/examples/test_exaone.py | + tests/integration/defs/examples/test_gemma.py | + tests/integration/defs/examples/test_gpt.py | + tests/integration/defs/examples/test_gptj.py | + tests/integration/defs/examples/test_granite.py | + tests/integration/defs/examples/test_internlm.py | + tests/integration/defs/examples/test_llama.py | + tests/integration/defs/examples/test_llm_api_with_mpi.py | + tests/integration/defs/examples/test_mamba.py | + tests/integration/defs/examples/test_medusa.py | + tests/integration/defs/examples/test_mistral.py | + tests/integration/defs/examples/test_mixtral.py | + tests/integration/defs/examples/test_multimodal.py | + tests/integration/defs/examples/test_nemotron_nas.py | + tests/integration/defs/examples/test_nemotron.py | + tests/integration/defs/examples/test_ngram.py | + tests/integration/defs/examples/test_openai.py | + tests/integration/defs/examples/test_phi.py | + tests/integration/defs/examples/test_qwen.py | + tests/integration/defs/examples/test_qwen2audio.py | + tests/integration/defs/examples/test_qwenvl.py | + tests/integration/defs/examples/test_ray.py | + tests/integration/defs/examples/test_recurrentgemma.py | + tests/integration/defs/examples/test_redrafter.py | + tests/integration/defs/examples/test_whisper.py | + tests/integration/defs/llmapi/__init__.py | + tests/integration/defs/llmapi/_run_llmapi_llm.py | + tests/integration/defs/llmapi/test_llm_api_connector.py | + tests/integration/defs/llmapi/test_llm_api_qa.py | + tests/integration/defs/llmapi/test_llm_e2e.py | + tests/integration/defs/llmapi/test_llm_examples.py | + tests/integration/defs/local_venv.py | + tests/integration/defs/perf/__init__.py | + tests/integration/defs/perf/allowed_configs.py | + tests/integration/defs/perf/build.py | + tests/integration/defs/perf/create_perf_comparison_report.py | + tests/integration/defs/perf/data_export.py | + tests/integration/defs/perf/data.py | + tests/integration/defs/perf/diff_tools.py | + tests/integration/defs/perf/gpu_clock_lock.py | + tests/integration/defs/perf/misc.py | + tests/integration/defs/perf/pytorch_model_config.py | + tests/integration/defs/perf/sample_options_config.py | + tests/integration/defs/perf/sampler_options_config.py | + tests/integration/defs/perf/sanity_perf_check.py | + tests/integration/defs/perf/session_data_writer.py | + tests/integration/defs/perf/test_perf.py | + tests/integration/defs/perf/utils.py | + tests/integration/defs/runner_interface.py | + tests/integration/defs/stress_test/stress_test.py | + tests/integration/defs/sysinfo/get_sysinfo.py | + tests/integration/defs/test_e2e.py | + tests/integration/defs/test_fmha.py | + tests/integration/defs/test_list_parser.py | + tests/integration/defs/test_list_validation.py | + tests/integration/defs/test_mlpf_results.py | + tests/integration/defs/test_sanity.py | + tests/integration/defs/test_unittests.py | + tests/integration/defs/triton_server/__init__.py | + tests/integration/defs/triton_server/build_engines.py | + tests/integration/defs/triton_server/common.py | + tests/integration/defs/triton_server/conftest.py | + tests/integration/defs/triton_server/local_venv.py | + tests/integration/defs/triton_server/rcca/bug_4323566/inflight_batcher_llm_client_with_end_id.py | + tests/integration/defs/triton_server/runner_interface.py | + tests/integration/defs/triton_server/test_list_parser.py | + tests/integration/defs/triton_server/test_triton_llm.py | + tests/integration/defs/triton_server/test_triton_memleak.py | + tests/integration/defs/triton_server/test_triton_multi_node.py | + tests/integration/defs/triton_server/test_triton_rcca.py | + tests/integration/defs/triton_server/test_triton.py | + tests/integration/defs/triton_server/trt_test_alternative.py | + tests/integration/defs/trt_test_alternative.py | + tests/integration/defs/utils/__init__.py | + tests/integration/defs/utils/periodic_junit.py | + tests/integration/defs/utils/timeout_manager.py | + tests/microbenchmarks/all_reduce.py | + tests/microbenchmarks/build_time_benchmark.py | + tests/microbenchmarks/build_time_dashboard.py | + tests/scripts/allreduce_perf/allreduce_heuristic_code_gen.py | + tests/scripts/allreduce_perf/allreduce_perf_viz.py | + tests/scripts/iteration_log_parser.py | + tests/scripts/perf-sanity/parse_benchmark_results.py | + tests/scripts/perf-sanity/run_benchmark_serve.py | + tests/unittest/_torch/attention/sparse/test_dsa_indexer.py | + tests/unittest/_torch/attention/sparse/test_flash_mla.py | + tests/unittest/_torch/attention/sparse/test_rocketkv.py | + tests/unittest/_torch/attention/sparse/test_sparse_mla_forward.py | + tests/unittest/_torch/attention/test_attention_mla.py | + tests/unittest/_torch/attention/test_attention_no_cache.py | + tests/unittest/_torch/attention/test_attention.py | + tests/unittest/_torch/attention/test_flashinfer_attention.py | + tests/unittest/_torch/attention/test_flashinfer_star_attn.py | + tests/unittest/_torch/attention/test_vanilla_attention.py | + tests/unittest/_torch/compilation/test_add_norm.py | + tests/unittest/_torch/debugger/test_debugger_addon.py | + tests/unittest/_torch/executor/test_chunked_logits.py | + tests/unittest/_torch/executor/test_executor_request_queue.py | + tests/unittest/_torch/executor/test_overlap_scheduler.py | + tests/unittest/_torch/executor/test_pytorch_model_engine.py | + tests/unittest/_torch/executor/test_resource_manager.py | + tests/unittest/_torch/executor/test_router_dealer_ipc.py | + tests/unittest/_torch/helpers.py | + tests/unittest/_torch/misc/test_autotuner.py | + tests/unittest/_torch/misc/test_share_tensor.py | + tests/unittest/_torch/misc/test_virtual_memory.py | + tests/unittest/_torch/modeling/test_modeling_bert.py | + tests/unittest/_torch/modeling/test_modeling_clip.py | + tests/unittest/_torch/modeling/test_modeling_exaone4.py | + tests/unittest/_torch/modeling/test_modeling_gemma3.py | + tests/unittest/_torch/modeling/test_modeling_gpt_oss.py | + tests/unittest/_torch/modeling/test_modeling_llama_min_latency.py | + tests/unittest/_torch/modeling/test_modeling_llama.py | + tests/unittest/_torch/modeling/test_modeling_mixtral.py | + tests/unittest/_torch/modeling/test_modeling_mllama.py | + tests/unittest/_torch/modeling/test_modeling_nemotron_h.py | + tests/unittest/_torch/modeling/test_modeling_nemotron_nas.py | + tests/unittest/_torch/modeling/test_modeling_nemotron.py | + tests/unittest/_torch/modeling/test_modeling_out_of_tree.py | + tests/unittest/_torch/modeling/test_modeling_phi3.py | + tests/unittest/_torch/modeling/test_modeling_qwen_moe.py | + tests/unittest/_torch/modeling/test_modeling_qwen.py | + tests/unittest/_torch/modeling/test_modeling_qwen2_5vl.py | + tests/unittest/_torch/modeling/test_modeling_siglip.py | + tests/unittest/_torch/modeling/test_modeling_vila.py | + tests/unittest/_torch/modules/test_fused_moe.py | + tests/unittest/_torch/modules/test_group_rmn_norm.py | + tests/unittest/_torch/modules/test_moe_host_sharer.py | + tests/unittest/_torch/modules/test_moe_load_balancer.py | + tests/unittest/_torch/modules/test_moe_routing.py | + tests/unittest/_torch/modules/test_rotary_embedding.py | + tests/unittest/_torch/modules/test_triton_linear.py | + tests/unittest/_torch/modules/tests_lora_modules/test_lora_attention_pytorch_flow_vs_trt.py | + tests/unittest/_torch/modules/tests_lora_modules/test_lora_plugin_vs_lora_op.py | + tests/unittest/_torch/multi_gpu_modeling/test_deepseek.py | + tests/unittest/_torch/multi_gpu_modeling/test_llama3.py | + tests/unittest/_torch/multi_gpu/test_allreduce.py | + tests/unittest/_torch/multi_gpu/test_alltoall.py | + tests/unittest/_torch/multi_gpu/test_ar_residual_norm.py | + tests/unittest/_torch/multi_gpu/test_embedding.py | + tests/unittest/_torch/multi_gpu/test_linear.py | + tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py | + tests/unittest/_torch/multi_gpu/test_mnnvl_allreduce.py | + tests/unittest/_torch/multi_gpu/test_mnnvl_memory.py | + tests/unittest/_torch/multi_gpu/test_moe_a2a.py | + tests/unittest/_torch/multi_gpu/test_star_attention.py | + tests/unittest/_torch/multi_gpu/test_user_buffers.py | + tests/unittest/_torch/multimodal/test_external_embedding.py | + tests/unittest/_torch/multimodal/test_find_num_image_tokens.py | + tests/unittest/_torch/multimodal/test_fuse_input_embeds.py | + tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py | + tests/unittest/_torch/multimodal/test_multimodal_runtime.py | + tests/unittest/_torch/multimodal/test_share_multiparams.py | + tests/unittest/_torch/pattern_watcher.py | + tests/unittest/_torch/ray_orchestrator/conftest.py | + tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py | + tests/unittest/_torch/ray_orchestrator/multi_gpu/test_mapping.py | + tests/unittest/_torch/ray_orchestrator/multi_gpu/test_ops_ray.py | + tests/unittest/_torch/ray_orchestrator/multi_gpu/test_ops.py | + tests/unittest/_torch/ray_orchestrator/multi_gpu/test_placement.py | + tests/unittest/_torch/ray_orchestrator/single_gpu/test_cache_transceiver_comm.py | + tests/unittest/_torch/sampler/test_beam_search.py | + tests/unittest/_torch/sampler/test_best_of_n.py | + tests/unittest/_torch/sampler/test_return_logits.py | + tests/unittest/_torch/sampler/test_torch_multi_arange.py | + tests/unittest/_torch/sampler/test_trtllm_sampler.py | + tests/unittest/_torch/speculative/test_draft_target.py | + tests/unittest/_torch/speculative/test_draft_token_tree_sampling.py | + tests/unittest/_torch/speculative/test_draft_token_tree_verification.py | + tests/unittest/_torch/speculative/test_dynamic_spec_decode.py | + tests/unittest/_torch/speculative/test_eagle3.py | + tests/unittest/_torch/speculative/test_kv_cache_reuse.py | + tests/unittest/_torch/speculative/test_mtp.py | + tests/unittest/_torch/speculative/test_ngram.py | + tests/unittest/_torch/speculative/test_save_state.py | + tests/unittest/_torch/speculative/test_spec_gate.py | + tests/unittest/_torch/speculative/test_torch_rejection_sampling.py | + tests/unittest/_torch/speculative/test_user_provided.py | + tests/unittest/_torch/test_connector.py | + tests/unittest/_torch/thop/parallel/deep_gemm_tests.py | + tests/unittest/_torch/thop/parallel/test_causal_conv1d_op.py | + tests/unittest/_torch/thop/parallel/test_cublas_mm.py | + tests/unittest/_torch/thop/parallel/test_custom_ops.py | + tests/unittest/_torch/thop/parallel/test_dsv3_fused_a_gemm.py | + tests/unittest/_torch/thop/parallel/test_dsv3_router_gemm.py | + tests/unittest/_torch/thop/parallel/test_finegrained_mixed_dtype_gemm.py | + tests/unittest/_torch/thop/parallel/test_fp4_bmm_quantize.py | + tests/unittest/_torch/thop/parallel/test_fp4_calculate_global_scale.py | + tests/unittest/_torch/thop/parallel/test_fp4_gemm_quantize.py | + tests/unittest/_torch/thop/parallel/test_fp4_linear.py | + tests/unittest/_torch/thop/parallel/test_fp4_swizzle.py | + tests/unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py | + tests/unittest/_torch/thop/parallel/test_fp8_linear.py | + tests/unittest/_torch/thop/parallel/test_fp8_per_tensor_scale_tllmg_gemm.py | + tests/unittest/_torch/thop/parallel/test_fp8_quantize.py | + tests/unittest/_torch/thop/parallel/test_fp8_rowwise_linear.py | + tests/unittest/_torch/thop/parallel/test_fused_qk_norm_rope.py | + tests/unittest/_torch/thop/parallel/test_logits_bitmask_op.py | + tests/unittest/_torch/thop/parallel/test_mamba_conv1d_op.py | + tests/unittest/_torch/thop/parallel/test_mamba2_chunk_ss_update.py | + tests/unittest/_torch/thop/parallel/test_moe.py | + tests/unittest/_torch/thop/parallel/test_noaux_tc.py | + tests/unittest/_torch/thop/parallel/test_scaled_mm.py | + tests/unittest/_torch/thop/parallel/test_selective_scan_op.py | + tests/unittest/_torch/thop/parallel/test_tinygemm2.py | + tests/unittest/_torch/thop/parallel/test_tllmg_bmm.py | + tests/unittest/_torch/thop/parallel/test_w4a16_linear.py | + tests/unittest/_torch/thop/parallel/test_w4a8_linear.py | + tests/unittest/_torch/thop/parallel/test_w4a8_mxfp4_mxfp8_gemm.py | + tests/unittest/_torch/thop/parallel/test_weight_only_quant_gemm.py | + tests/unittest/_torch/thop/parallel/test_weight_only_quant_linear.py | + tests/unittest/_torch/thop/serial/test_moe_alltoall.py | + tests/unittest/api_stability/api_stability_core.py | + tests/unittest/api_stability/test_llm_api.py | + tests/unittest/bindings/binding_test_utils.py | + tests/unittest/bindings/test_bindings_moe.py | + tests/unittest/bindings/test_bindings_ut.py | + tests/unittest/bindings/test_executor_bindings.py | + tests/unittest/bindings/test_hostfunc.py | + tests/unittest/conftest.py | + tests/unittest/disaggregated/test_cluster_storage.py | + tests/unittest/disaggregated/test_disagg_cluster_manager_worker.py | + tests/unittest/disaggregated/test_disagg_utils.py | + tests/unittest/disaggregated/test_remoteDictionary.py | + tests/unittest/disaggregated/test_router.py | + tests/unittest/dump_checkpoint_stats.py | + tests/unittest/executor/test_base_worker.py | + tests/unittest/executor/test_rpc_proxy.py | + tests/unittest/executor/test_rpc_worker.py | + tests/unittest/executor/test_rpc.py | + tests/unittest/gc_utils.py | + tests/unittest/llmapi/__init__.py | + tests/unittest/llmapi/_run_mpi_comm_task.py | + tests/unittest/llmapi/_run_multi_llm_tasks.py | + tests/unittest/llmapi/_run_multi_mpi_comm_tasks.py | + tests/unittest/llmapi/apps/__init__.py | + tests/unittest/llmapi/apps/_test_disagg_serving_multi_nodes.py | + tests/unittest/llmapi/apps/_test_llm_chat.py | + tests/unittest/llmapi/apps/_test_llm_server.py | + tests/unittest/llmapi/apps/_test_openai_cache_salt.py | + tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py | + tests/unittest/llmapi/apps/_test_openai_chat_harmony.py | + tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py | + tests/unittest/llmapi/apps/_test_openai_chat.py | + tests/unittest/llmapi/apps/_test_openai_completions.py | + tests/unittest/llmapi/apps/_test_openai_consistent_chat.py | + tests/unittest/llmapi/apps/_test_openai_lora.py | + tests/unittest/llmapi/apps/_test_openai_metrics.py | + tests/unittest/llmapi/apps/_test_openai_misc.py | + tests/unittest/llmapi/apps/_test_openai_mmencoder.py | + tests/unittest/llmapi/apps/_test_openai_multi_chat.py | + tests/unittest/llmapi/apps/_test_openai_multi_gpu.py | + tests/unittest/llmapi/apps/_test_openai_multi_nodes.py | + tests/unittest/llmapi/apps/_test_openai_perf_metrics.py | + tests/unittest/llmapi/apps/_test_openai_prometheus.py | + tests/unittest/llmapi/apps/_test_openai_reasoning.py | + tests/unittest/llmapi/apps/_test_openai_responses.py | + tests/unittest/llmapi/apps/_test_openai_tool_call.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_benchmark.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_duplicated_args.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_example.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_benchmark.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_example.py | + tests/unittest/llmapi/apps/_test_trtllm_serve_top_logprobs.py | + tests/unittest/llmapi/apps/openai_server.py | + tests/unittest/llmapi/apps/test_tool_parsers.py | + tests/unittest/llmapi/apps/utils.py | + tests/unittest/llmapi/lora_test_utils.py | + tests/unittest/llmapi/run_llm_exit.py | + tests/unittest/llmapi/run_llm_with_postproc.py | + tests/unittest/llmapi/run_llm.py | + tests/unittest/llmapi/test_additional_model_outputs.py | + tests/unittest/llmapi/test_build_cache.py | + tests/unittest/llmapi/test_executor.py | + tests/unittest/llmapi/test_gc_utils.py | + tests/unittest/llmapi/test_llm_args.py | + tests/unittest/llmapi/test_llm_download.py | + tests/unittest/llmapi/test_llm_kv_cache_events.py | + tests/unittest/llmapi/test_llm_models.py | + tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py | + tests/unittest/llmapi/test_llm_multi_gpu.py | + tests/unittest/llmapi/test_llm_pytorch.py | + tests/unittest/llmapi/test_llm_quant.py | + tests/unittest/llmapi/test_llm_utils.py | + tests/unittest/llmapi/test_llm.py | + tests/unittest/llmapi/test_memory_profiling.py | + tests/unittest/llmapi/test_mpi_session.py | + tests/unittest/llmapi/test_reasoning_parser.py | + tests/unittest/llmapi/test_serialization.py | + tests/unittest/llmapi/test_utils.py | + tests/unittest/others/__init__.py | + tests/unittest/others/test_builder.py | + tests/unittest/others/test_convert_spec_decoding_mask_to_packed_mask.py | + tests/unittest/others/test_debugging_api.py | + tests/unittest/others/test_exception.py | + tests/unittest/others/test_export.py | + tests/unittest/others/test_graph_rewriter.py | + tests/unittest/others/test_kv_cache_manager.py | + tests/unittest/others/test_kv_cache_transceiver.py | + tests/unittest/others/test_kv_cache_update.py | + tests/unittest/others/test_layer.py | + tests/unittest/others/test_leak.py | + tests/unittest/others/test_mapping.py | + tests/unittest/others/test_model_dtype.py | + tests/unittest/others/test_module.py | + tests/unittest/others/test_multimodal_registry.py | + tests/unittest/others/test_plugins.py | + tests/unittest/others/test_precision_control.py | + tests/unittest/others/test_pretrained_config.py | + tests/unittest/others/test_session.py | + tests/unittest/others/test_time_breakdown.py | + tests/unittest/profile_utils.py | + tests/unittest/scaffolding/__init__.py | + tests/unittest/scaffolding/test_bench.py | + tests/unittest/scaffolding/test_parallel_process.py | + tests/unittest/scaffolding/test_scaffolding.py | + tests/unittest/scaffolding/test_task_collection.py | + tests/unittest/scaffolding/test_worker.py | + tests/unittest/test_model_runner_cpp.py | + tests/unittest/test_pip_install.py | + tests/unittest/tools/__init__.py | + tests/unittest/tools/plugin_gen/__init__.py | + tests/unittest/tools/plugin_gen/kernel_config.py | + tests/unittest/tools/plugin_gen/test_core.py | + tests/unittest/tools/plugin_gen/test_plugin_gen.py | + tests/unittest/tools/plugin_gen/test_shape_infer.py | + tests/unittest/tools/test_layer_wise_benchmarks.py | + tests/unittest/tools/test_prepare_dataset.py | + tests/unittest/tools/test_test_to_stage_mapping.py | + tests/unittest/trt/__init__.py | + tests/unittest/trt/attention/test_bert_attention.py | + tests/unittest/trt/attention/test_gpt_attention_IFB.py | + tests/unittest/trt/attention/test_gpt_attention_no_cache.py | + tests/unittest/trt/attention/test_gpt_attention.py | + tests/unittest/trt/attention/test_sage_attention.py | + tests/unittest/trt/functional/__init__.py | + tests/unittest/trt/functional/test_alibi.py | + tests/unittest/trt/functional/test_allreduce_norm.py | + tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py | + tests/unittest/trt/functional/test_arange.py | + tests/unittest/trt/functional/test_argmax.py | + tests/unittest/trt/functional/test_assertion.py | + tests/unittest/trt/functional/test_avg_pool2d.py | + tests/unittest/trt/functional/test_cast.py | + tests/unittest/trt/functional/test_conv2d.py | + tests/unittest/trt/functional/test_conv3d.py | + tests/unittest/trt/functional/test_cos.py | + tests/unittest/trt/functional/test_cumsum.py | + tests/unittest/trt/functional/test_dora.py | + tests/unittest/trt/functional/test_einsum.py | + tests/unittest/trt/functional/test_embedding_single_gpu.py | + tests/unittest/trt/functional/test_exp.py | + tests/unittest/trt/functional/test_expand.py | + tests/unittest/trt/functional/test_flatten.py | + tests/unittest/trt/functional/test_flip.py | + tests/unittest/trt/functional/test_fp4_gemm_ootb.py | + tests/unittest/trt/functional/test_fp4_gemm.py | + tests/unittest/trt/functional/test_gather_nd.py | + tests/unittest/trt/functional/test_gather.py | + tests/unittest/trt/functional/test_geglu.py | + tests/unittest/trt/functional/test_gelu.py | + tests/unittest/trt/functional/test_gemm_swiglu.py | + tests/unittest/trt/functional/test_group_norm.py | + tests/unittest/trt/functional/test_identity.py | + tests/unittest/trt/functional/test_index_select.py | + tests/unittest/trt/functional/test_interpolate.py | + tests/unittest/trt/functional/test_logsoftmax.py | + tests/unittest/trt/functional/test_lora.py | + tests/unittest/trt/functional/test_low_latency_gemm.py | + tests/unittest/trt/functional/test_mamba_conv1d.py | + tests/unittest/trt/functional/test_masked_scatter.py | + tests/unittest/trt/functional/test_masked_select.py | + tests/unittest/trt/functional/test_matmul.py | + tests/unittest/trt/functional/test_meshgrid2d.py | + tests/unittest/trt/functional/test_moe.py | + tests/unittest/trt/functional/test_nccl.py | + tests/unittest/trt/functional/test_nonzero.py | + tests/unittest/trt/functional/test_outer.py | + tests/unittest/trt/functional/test_pad.py | + tests/unittest/trt/functional/test_permute.py | + tests/unittest/trt/functional/test_pp_reduce_scatter.py | + tests/unittest/trt/functional/test_quant.py | + tests/unittest/trt/functional/test_rearrange.py | + tests/unittest/trt/functional/test_repeat_interleave.py | + tests/unittest/trt/functional/test_repeat.py | + tests/unittest/trt/functional/test_rg_lru.py | + tests/unittest/trt/functional/test_sample.py | + tests/unittest/trt/functional/test_scatter_nd.py | + tests/unittest/trt/functional/test_scatter.py | + tests/unittest/trt/functional/test_select.py | + tests/unittest/trt/functional/test_selective_scan.py | + tests/unittest/trt/functional/test_sigmoid.py | + tests/unittest/trt/functional/test_silu.py | + tests/unittest/trt/functional/test_sin.py | + tests/unittest/trt/functional/test_slice.py | + tests/unittest/trt/functional/test_softplus.py | + tests/unittest/trt/functional/test_split.py | + tests/unittest/trt/functional/test_squeeze.py | + tests/unittest/trt/functional/test_swiglu.py | + tests/unittest/trt/functional/test_topk.py | + tests/unittest/trt/functional/test_transpose.py | + tests/unittest/trt/functional/test_unbind.py | + tests/unittest/trt/functional/test_unsqueeze.py | + tests/unittest/trt/functional/test_view.py | + tests/unittest/trt/functional/test_where.py | + tests/unittest/trt/model_api/profile_utils.py | + tests/unittest/trt/model_api/test_model_api_multi_gpu.py | + tests/unittest/trt/model_api/test_model_level_api.py | + tests/unittest/trt/model_api/test_model_quantization.py | + tests/unittest/trt/model/__init__.py | + tests/unittest/trt/model/eagle/test_decode_draft_tokens_plugin.py | + tests/unittest/trt/model/eagle/test_prepare_drafter_inputs_plugin.py | + tests/unittest/trt/model/eagle/test_sample_accept_draft_tokens_plugin.py | + tests/unittest/trt/model/redrafter/test_beams2tree.py | + tests/unittest/trt/model/redrafter/test_draft_token_indices.py | + tests/unittest/trt/model/redrafter/test_draft_token.py | + tests/unittest/trt/model/redrafter/test_gather_beams.py | + tests/unittest/trt/model/redrafter/test_mask.py | + tests/unittest/trt/model/redrafter/test_packed_position_ids.py | + tests/unittest/trt/model/redrafter/test_prefix_match_indices.py | + tests/unittest/trt/model/redrafter/test_prepare_input.py | + tests/unittest/trt/model/redrafter/test_process_logits.py | + tests/unittest/trt/model/redrafter/test_top1.py | + tests/unittest/trt/model/redrafter/test_unpack_gen_data.py | + tests/unittest/trt/model/redrafter/test_validate.py | + tests/unittest/trt/model/test_gpt_e2e.py | + tests/unittest/trt/model/test_gpt.py | + tests/unittest/trt/model/test_llama.py | + tests/unittest/trt/model/test_mamba.py | + tests/unittest/trt/model/test_mistral.py | + tests/unittest/trt/model/test_nemotron_nas.py | + tests/unittest/trt/model/test_phi.py | + tests/unittest/trt/model/test_unet.py | + tests/unittest/trt/python_plugin/plugin_wrapper_utils.py | + tests/unittest/trt/python_plugin/test_plugin_wrapper.py | + tests/unittest/trt/quantization/__init__.py | + tests/unittest/trt/quantization/_utils.py | + tests/unittest/trt/quantization/test_fp8_quantization.py | + tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py | + tests/unittest/trt/quantization/test_functional.py | + tests/unittest/trt/quantization/test_mode.py | + tests/unittest/trt/quantization/test_moe_weight_only_quant_matmul.py | + tests/unittest/trt/quantization/test_qserve_gemm.py | + tests/unittest/trt/quantization/test_quant_layer.py | + tests/unittest/trt/quantization/test_quant.py | + tests/unittest/trt/quantization/test_smooth_quant_gemm.py | + tests/unittest/trt/quantization/test_smooth_quant_layer_norm.py | + tests/unittest/trt/quantization/test_smooth_quant_rms_norm.py | + tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py | + tests/unittest/trt/quantization/test_weight_only_quant_matmul.py | + tests/unittest/utils/__init__.py | + tests/unittest/utils/cpp_paths.py | + tests/unittest/utils/llm_data.py | + tests/unittest/utils/runtime_defaults.py | + tests/unittest/utils/test_medusa_utils.py | + tests/unittest/utils/test_prebuilt_whl_cpp_extensions.py | + tests/unittest/utils/test_util.py | + tests/unittest/utils/torch_ref.py | + tests/unittest/utils/util.py | + triton_backend/all_models/disaggregated_serving/disaggregated_serving_bls/1/model.py | + triton_backend/all_models/gpt/postprocessing/1/model.py | + triton_backend/all_models/gpt/preprocessing/1/model.py | + triton_backend/all_models/gpt/tensorrt_llm/1/model.py | + triton_backend/all_models/inflight_batcher_llm/postprocessing/1/model.py | + triton_backend/all_models/inflight_batcher_llm/preprocessing/1/model.py | + triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/decode.py | + triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py | + triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/model.py | + triton_backend/all_models/inflight_batcher_llm/tensorrt_llm/1/model.py | + triton_backend/all_models/llmapi/tensorrt_llm/1/helpers.py | + triton_backend/all_models/llmapi/tensorrt_llm/1/model.py | + triton_backend/all_models/multimodal/multimodal_encoders/1/model.py | + triton_backend/all_models/multimodal/multimodal_encoders/1/multimodal_utils.py | + triton_backend/all_models/tests/test_decode.py | + triton_backend/all_models/tests/test_llmapi_python_backend.py | + triton_backend/all_models/tests/test_multi_image_preprocess.py | + triton_backend/all_models/tests/test_multimodal_encoders.py | + triton_backend/all_models/tests/test_python_backend.py | + triton_backend/all_models/tests/test_triton_decoder.py | + triton_backend/all_models/whisper/whisper_bls/1/fbank.py | + triton_backend/all_models/whisper/whisper_bls/1/model.py | + triton_backend/all_models/whisper/whisper_bls/1/tokenizer.py | + triton_backend/ci/L0_backend_trtllm/base_metrics_verification_tests.py | + triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py | + triton_backend/inflight_batcher_llm/client/__init__.py | + triton_backend/inflight_batcher_llm/client/e2e_grpc_speculative_decoding_client.py | + triton_backend/inflight_batcher_llm/client/end_to_end_grpc_client.py | + triton_backend/inflight_batcher_llm/client/inflight_batcher_llm_client.py | + triton_backend/scripts/launch_triton_server.py | + triton_backend/tools/__init__.py | + triton_backend/tools/fill_template.py | + triton_backend/tools/gpt/benchmark_core_model.py | + triton_backend/tools/gpt/client_async.py | + triton_backend/tools/gpt/client.py | + triton_backend/tools/gpt/end_to_end_test.py | + triton_backend/tools/gpt/gen_input_data.py | + triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py | + triton_backend/tools/inflight_batcher_llm/end_to_end_test.py | + triton_backend/tools/inflight_batcher_llm/speculative_decoding_test.py | + triton_backend/tools/inflight_batcher_llm/test_max_queue_size.py | + triton_backend/tools/llmapi_client.py | + triton_backend/tools/multimodal/client.py | + triton_backend/tools/tests/__init__.py | + triton_backend/tools/tests/test_fill_template.py | + triton_backend/tools/tests/test_llmapi_cancel.py | + triton_backend/tools/utils/__init__.py | + triton_backend/tools/utils/utils.py | + triton_backend/tools/whisper/client.py | + )$ + default_install_hook_types: [pre-commit, commit-msg] repos: - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort + files: *common_files - repo: https://github.com/Lucas-C/pre-commit-hooks.git rev: v1.5.5 hooks: @@ -12,7 +1386,7 @@ repos: rev: v0.43.0 hooks: - id: yapf - exclude: ".*/auto_deploy/.*" + files: *common_files - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 hooks: @@ -48,6 +1422,7 @@ repos: additional_dependencies: - tomli args: ['--config', 'pyproject.toml'] + files: *common_files - repo: https://github.com/pre-commit/mirrors-clang-format rev: v16.0.0 hooks: @@ -72,9 +1447,7 @@ repos: hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - pass_filenames: false - id: ruff-format - pass_filenames: false - repo: https://github.com/executablebooks/mdformat rev: 0.7.17 hooks: diff --git a/examples/models/core/qwen2audio/run_chat.py b/examples/models/core/qwen2audio/run_chat.py index d36ca786521..00d58cdc862 100644 --- a/examples/models/core/qwen2audio/run_chat.py +++ b/examples/models/core/qwen2audio/run_chat.py @@ -13,10 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +# isort: off import torch from run import QWenInfer, parse_arguments import tensorrt_llm +# isort: on if __name__ == '__main__': args = parse_arguments() diff --git a/examples/models/core/qwenvl/run_chat.py b/examples/models/core/qwenvl/run_chat.py index e3457b8a541..1f1ba6fb6fa 100644 --- a/examples/models/core/qwenvl/run_chat.py +++ b/examples/models/core/qwenvl/run_chat.py @@ -14,8 +14,10 @@ # limitations under the License. import re +# isort: off import torch from run import QWenInfer, parse_arguments, vit_process +# isort: on def make_display(port=8006): diff --git a/pyproject.toml b/pyproject.toml index 71d71e4d602..b8e82b048c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,69 +10,11 @@ build-backend = "setuptools.build_meta" #################################################################################################### [tool.isort] line_length = 80 -# This should match the `include` in `[tool.ruff]`. See the comments in that section for why this -# is necessary. -extend_skip_glob = [ - "**/auto_deploy/**", - "tensorrt_llm/_common.py", - "tensorrt_llm/_dlpack_utils.py", - "tensorrt_llm/_ipc_utils.py", - "tensorrt_llm/_mnnvl_utils.py", - "tensorrt_llm/_torch/models/modeling_pixtral.py", - "tensorrt_llm/disaggregated_params.py", - "tensorrt_llm/engine.py", - "tensorrt_llm/graph_rewriting.py", - "tensorrt_llm/logger.py", - "tensorrt_llm/lora_manager.py", - "tensorrt_llm/module.py", - "tensorrt_llm/moe_config.py", - "tensorrt_llm/profiler.py", - "tensorrt_llm/prompt_adapter_manager.py", - "tensorrt_llm/python_plugin.py", - "tensorrt_llm/sampling_params.py", - "tensorrt_llm/top_model_mixin.py", - "tests/unittest/_torch/modeling/test_modeling_mistral.py", - "tests/unittest/_torch/modeling/test_modeling_pixtral.py", - "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py", - "tests/unittest/_torch/sampler/test_torch_sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampling_utils.py", -] [tool.yapf] based_on_style = "pep8" column_limit = 80 -[tool.yapfignore] -# This should match the `include` in `[tool.ruff]`. See the comments in that section for why this -# is necessary. -ignore_patterns = [ - "**/auto_deploy/**", - "tensorrt_llm/_common.py", - "tensorrt_llm/_dlpack_utils.py", - "tensorrt_llm/_ipc_utils.py", - "tensorrt_llm/_mnnvl_utils.py", - "tensorrt_llm/_torch/models/modeling_pixtral.py", - "tensorrt_llm/disaggregated_params.py", - "tensorrt_llm/engine.py", - "tensorrt_llm/graph_rewriting.py", - "tensorrt_llm/logger.py", - "tensorrt_llm/lora_manager.py", - "tensorrt_llm/module.py", - "tensorrt_llm/moe_config.py", - "tensorrt_llm/profiler.py", - "tensorrt_llm/prompt_adapter_manager.py", - "tensorrt_llm/python_plugin.py", - "tensorrt_llm/sampling_params.py", - "tensorrt_llm/top_model_mixin.py", - "tests/unittest/_torch/modeling/test_modeling_mistral.py", - "tests/unittest/_torch/modeling/test_modeling_pixtral.py", - "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py", - "tests/unittest/_torch/sampler/test_torch_sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampling_utils.py", -] - [tool.codespell] skip = ".git,3rdparty,tests/integration/test_input_files**,**.jsonl,**.json" exclude-file = "examples/models/core/whisper/tokenizer.py" @@ -82,33 +24,6 @@ ignore-words-list = "rouge,inout,atleast,strat,nd,subtile,thrid,improbe,NotIn,te in-place = true remove_all_unused_imports = true remove_unused_variables = true -# This should match the `include` in `[tool.ruff]`. See the comments in that section for why this -# is necessary. -exclude = [ - "**/auto_deploy/**", - "tensorrt_llm/_common.py", - "tensorrt_llm/_dlpack_utils.py", - "tensorrt_llm/_ipc_utils.py", - "tensorrt_llm/_mnnvl_utils.py", - "tensorrt_llm/_torch/models/modeling_pixtral.py", - "tensorrt_llm/disaggregated_params.py", - "tensorrt_llm/engine.py", - "tensorrt_llm/graph_rewriting.py", - "tensorrt_llm/logger.py", - "tensorrt_llm/lora_manager.py", - "tensorrt_llm/module.py", - "tensorrt_llm/moe_config.py", - "tensorrt_llm/profiler.py", - "tensorrt_llm/prompt_adapter_manager.py", - "tensorrt_llm/python_plugin.py", - "tensorrt_llm/sampling_params.py", - "tensorrt_llm/top_model_mixin.py", - "tests/unittest/_torch/modeling/test_modeling_mistral.py", - "tests/unittest/_torch/modeling/test_modeling_pixtral.py", - "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py", - "tensorrt_llm/_torch/pyexecutor/sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampling_utils.py", -] #################################################################################################### @@ -117,47 +32,1383 @@ exclude = [ [tool.ruff] line-length = 100 # Line length limit for code fix = true -include = [ - # all pyproject.toml files - "**/pyproject.toml", - # standard include of ruff restricted to auto_deploy folders - "**/auto_deploy/**/*.py", - "**/auto_deploy/**/*.pyi", - "**/auto_deploy/**/*.ipynb", - # Progressively enable ruff on all the repo to keep individual changes reasonably-sized, and - # keep merge conflicts manageable. - # Since keeping both `yapf` and `ruff` makes no sense (given that their formatting philosophies - # are quite different), we should move towards removing one in favor of the other. ruff's - # formatting mirrors black's, and both are much more widely adopted than yapf. ruff is also - # orders of magnitude faster, so we should move to deprecate `yapf`. - # In the transition period, we should keep the `ignore_patterns` in `[tool.yapfignore]` in sync - # with the below, so that both pre-commit hooks can complete successfully. - "tensorrt_llm/_common.py", - "tensorrt_llm/_dlpack_utils.py", - "tensorrt_llm/_ipc_utils.py", - "tensorrt_llm/_mnnvl_utils.py", - "tensorrt_llm/_torch/models/modeling_pixtral.py", - "tensorrt_llm/disaggregated_params.py", - "tensorrt_llm/engine.py", - "tensorrt_llm/graph_rewriting.py", - "tensorrt_llm/logger.py", - "tensorrt_llm/lora_manager.py", - "tensorrt_llm/module.py", - "tensorrt_llm/moe_config.py", - "tensorrt_llm/profiler.py", - "tensorrt_llm/prompt_adapter_manager.py", - "tensorrt_llm/python_plugin.py", - "tensorrt_llm/sampling_params.py", - "tensorrt_llm/top_model_mixin.py", - "tests/unittest/_torch/modeling/test_modeling_mistral.py", - "tests/unittest/_torch/modeling/test_modeling_pixtral.py", - "tests/unittest/_torch/models/checkpoints/hf/test_weight_loader.py", - "tests/unittest/_torch/sampler/test_torch_sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampler.py", - "tensorrt_llm/_torch/pyexecutor/sampling_utils.py", -] +# Exclude 3rdparty and files to be formatted by isort, yapf, and autoflake. +# Keep this list in sync with .pre-commit-config.yaml. +# Progressively enable ruff on all the repo to keep individual changes reasonably-sized, and +# keep merge conflicts manageable. +# Since keeping both `yapf` and `ruff` makes no sense (given that their formatting philosophies +# are quite different), we should move towards removing one in favor of the other. ruff's +# formatting mirrors black's, and both are much more widely adopted than yapf. ruff is also +# orders of magnitude faster, so we should move to deprecate `yapf`. exclude = [ "**3rdparty/**", + ".devcontainer/make_env.py", + ".github/scripts/label_community_user.py", + ".github/scripts/pr_checklist_check.py", + "benchmarks/cpp/__init__.py", + "benchmarks/cpp/prepare_dataset.py", + "benchmarks/cpp/utils/__init__.py", + "benchmarks/cpp/utils/convert_nemo_dataset.py", + "benchmarks/cpp/utils/generate_rand_loras.py", + "benchmarks/cpp/utils/prepare_real_data.py", + "benchmarks/cpp/utils/prepare_synthetic_data.py", + "benchmarks/cpp/utils/utils.py", + "cpp/conanfile.py", + "cpp/kernels/fmha_v2/conftest.py", + "cpp/kernels/fmha_v2/fmha_test.py", + "cpp/kernels/fmha_v2/setup.py", + "cpp/kernels/fmha_v2/test/conftest.py", + "cpp/kernels/fmha_v2/test/fmha/filter_rules.py", + "cpp/kernels/fmha_v2/test/fmha/test_fmha_exe.py", + "cpp/kernels/fmha_v2/test/fmha/test_fmhca_exe.py", + "cpp/kernels/fmha_v2/test/fmha/test_meta.py", + "cpp/kernels/fmha_v2/test/fmha/utils.py", + "cpp/kernels/fmha_v2/test/train_ops/test_train_ops.py", + "cpp/kernels/fmha_v2/train_ops/fmha_bmark.py", + "cpp/kernels/fmha_v2/train_ops/fmha_unit_test.py", + "cpp/kernels/fmha_v2/train_ops/my_utils.py", + "cpp/kernels/fmha_v2/train_ops/te_mha.py", + "cpp/kernels/fmha_v2/train_ops/train_setup.py", + "cpp/kernels/xqa/gen_cpp_header.py", + "cpp/kernels/xqa/gen_cubins.py", + "cpp/kernels/xqa/ref.py", + "cpp/libnuma_conan.py", + "cpp/micro_benchmarks/gen-moe-benchmark-file.py", + "cpp/tensorrt_llm/deep_ep/strip_nvshmem_helper.py", + "cpp/tensorrt_llm/kernels/cutlass_kernels/python/generate_kernels.py", + "cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/copy_cu.py", + "cpp/tests/resources/scripts/build_chatglm_engines.py", + "cpp/tests/resources/scripts/build_eagle_engines.py", + "cpp/tests/resources/scripts/build_enc_dec_engines.py", + "cpp/tests/resources/scripts/build_engines_utils.py", + "cpp/tests/resources/scripts/build_gpt_engines.py", + "cpp/tests/resources/scripts/build_gptj_engines.py", + "cpp/tests/resources/scripts/build_llama_engines.py", + "cpp/tests/resources/scripts/build_mamba_engines.py", + "cpp/tests/resources/scripts/build_medusa_engines.py", + "cpp/tests/resources/scripts/build_recurrentgemma_engines.py", + "cpp/tests/resources/scripts/build_redrafter_engines.py", + "cpp/tests/resources/scripts/generate_expected_chatglm_output.py", + "cpp/tests/resources/scripts/generate_expected_eagle_output.py", + "cpp/tests/resources/scripts/generate_expected_enc_dec_output.py", + "cpp/tests/resources/scripts/generate_expected_gpt_output.py", + "cpp/tests/resources/scripts/generate_expected_gptj_output.py", + "cpp/tests/resources/scripts/generate_expected_llama_output.py", + "cpp/tests/resources/scripts/generate_expected_mamba_output.py", + "cpp/tests/resources/scripts/generate_expected_medusa_output.py", + "cpp/tests/resources/scripts/generate_expected_recurrentgemma_output.py", + "cpp/tests/resources/scripts/generate_expected_redrafter_output.py", + "cpp/tests/resources/scripts/generate_hf_gpt_output.py", + "cpp/tests/resources/scripts/generate_test_lora_weights.py", + "cpp/tests/resources/scripts/io_converter.py", + "docs/source/conf.py", + "docs/source/helper.py", + "examples/apps/chat.py", + "examples/apps/fastapi_server.py", + "examples/bindings/executor/example_advanced.py", + "examples/bindings/executor/example_basic.py", + "examples/bindings/executor/example_debug.py", + "examples/bindings/executor/example_logits_processor.py", + "examples/disaggregated/clients/disagg_client.py", + "examples/disaggregated/slurm/benchmark/gen_server_config.py", + "examples/disaggregated/slurm/benchmark/gen_worker_config.py", + "examples/disaggregated/slurm/benchmark/submit.py", + "examples/dora/normalize_weights.py", + "examples/eagle/convert_checkpoint.py", + "examples/eval_long_context.py", + "examples/generate_checkpoint_config.py", + "examples/generate_xgrammar_tokenizer_info.py", + "examples/hf_lora_convert.py", + "examples/infinitebench/args.py", + "examples/infinitebench/compute_scores.py", + "examples/infinitebench/construct_synthetic_dataset.py", + "examples/infinitebench/eval_utils.py", + "examples/layer_wise_benchmarks/run_single.py", + "examples/llm-api/_tensorrt_engine/llm_eagle_decoding.py", + "examples/llm-api/_tensorrt_engine/llm_eagle2_decoding.py", + "examples/llm-api/_tensorrt_engine/llm_inference_customize.py", + "examples/llm-api/_tensorrt_engine/llm_inference_kv_events.py", + "examples/llm-api/_tensorrt_engine/llm_lookahead_decoding.py", + "examples/llm-api/_tensorrt_engine/llm_medusa_decoding.py", + "examples/llm-api/_tensorrt_engine/llm_quantization.py", + "examples/llm-api/_tensorrt_engine/quickstart_example.py", + "examples/llm-api/llm_guided_decoding.py", + "examples/llm-api/llm_inference_async_streaming.py", + "examples/llm-api/llm_inference_async.py", + "examples/llm-api/llm_inference_distributed.py", + "examples/llm-api/llm_inference.py", + "examples/llm-api/llm_kv_cache_connector.py", + "examples/llm-api/llm_kv_cache_offloading.py", + "examples/llm-api/llm_logits_processor.py", + "examples/llm-api/llm_multilora.py", + "examples/llm-api/llm_runtime.py", + "examples/llm-api/llm_sampling.py", + "examples/llm-api/llm_sparse_attention.py", + "examples/llm-api/llm_speculative_decoding.py", + "examples/llm-api/out_of_tree_example/main.py", + "examples/llm-api/out_of_tree_example/modeling_opt.py", + "examples/llm-api/quickstart_advanced.py", + "examples/llm-api/quickstart_example.py", + "examples/llm-api/quickstart_multimodal.py", + "examples/llm-api/star_attention.py", + "examples/llm-eval/lm-eval-harness/lm_eval_tensorrt_llm.py", + "examples/longbench/eval_longbench_v1.py", + "examples/longbench/eval_longbench_v2.py", + "examples/medusa/convert_checkpoint.py", + "examples/mmlu.py", + "examples/models/contrib/baichuan/convert_checkpoint.py", + "examples/models/contrib/bloom/convert_checkpoint.py", + "examples/models/contrib/chatglm-6b/tokenization_chatglm.py", + "examples/models/contrib/chatglm2-6b/tokenization_chatglm.py", + "examples/models/contrib/chatglm3-6b-32k/tokenization_chatglm.py", + "examples/models/contrib/cogvlm/convert_checkpoint.py", + "examples/models/contrib/dbrx/convert_checkpoint.py", + "examples/models/contrib/deepseek_v1/__init__.py", + "examples/models/contrib/deepseek_v1/convert_checkpoint.py", + "examples/models/contrib/deepseek_v2/convert_checkpoint.py", + "examples/models/contrib/dit/convert_checkpoint.py", + "examples/models/contrib/dit/diffusion.py", + "examples/models/contrib/dit/sample.py", + "examples/models/contrib/dit/utils_modelopt.py", + "examples/models/contrib/dit/vae_decoder_trt.py", + "examples/models/contrib/falcon/convert_checkpoint.py", + "examples/models/contrib/gptj/convert_checkpoint.py", + "examples/models/contrib/gptneox/convert_checkpoint.py", + "examples/models/contrib/grok/convert_checkpoint.py", + "examples/models/contrib/mmdit/convert_checkpoint.py", + "examples/models/contrib/mmdit/sample.py", + "examples/models/contrib/mpt/convert_checkpoint.py", + "examples/models/contrib/opt/convert_checkpoint.py", + "examples/models/contrib/sdxl/build_sdxl_unet.py", + "examples/models/contrib/sdxl/pipeline_stable_diffusion_xl.py", + "examples/models/contrib/sdxl/run_sdxl.py", + "examples/models/contrib/stdit/aspect.py", + "examples/models/contrib/stdit/convert_checkpoint.py", + "examples/models/contrib/stdit/pipeline_tllm.py", + "examples/models/contrib/stdit/sample.py", + "examples/models/contrib/stdit/scheduler.py", + "examples/models/contrib/stdit/text_encoder.py", + "examples/models/contrib/stdit/utils.py", + "examples/models/contrib/stdit/vae.py", + "examples/models/contrib/stdit/video_transforms.py", + "examples/models/core/bert/__init__.py", + "examples/models/core/bert/convert_checkpoint.py", + "examples/models/core/bert/run.py", + "examples/models/core/bert/utils.py", + "examples/models/core/commandr/convert_checkpoint.py", + "examples/models/core/enc_dec/__init__.py", + "examples/models/core/enc_dec/convert_checkpoint.py", + "examples/models/core/enc_dec/helper.py", + "examples/models/core/enc_dec/run.py", + "examples/models/core/gemma/convert_checkpoint.py", + "examples/models/core/glm-4-9b/convert_checkpoint.py", + "examples/models/core/glm-4-9b/tokenization_chatglm.py", + "examples/models/core/gpt_oss/openai_chat_client_function_calling.py", + "examples/models/core/gpt/convert_checkpoint.py", + "examples/models/core/gpt/merge_ptuning_tables.py", + "examples/models/core/gpt/nemo_lora_convert.py", + "examples/models/core/gpt/nemo_prompt_convert.py", + "examples/models/core/gpt/run_hf.py", + "examples/models/core/internlm2/convert_checkpoint.py", + "examples/models/core/kimi_k2/kimi_k2_tool_calling_example.py", + "examples/models/core/llama/convert_checkpoint.py", + "examples/models/core/llama/summarize_long.py", + "examples/models/core/mamba/convert_checkpoint.py", + "examples/models/core/mllama/convert_checkpoint.py", + "examples/models/core/multimodal/__init__.py", + "examples/models/core/multimodal/build_multimodal_engine.py", + "examples/models/core/multimodal/eval.py", + "examples/models/core/multimodal/run.py", + "examples/models/core/multimodal/utils.py", + "examples/models/core/nemotron_nas/calibration_utils.py", + "examples/models/core/nemotron_nas/convert_checkpoint.py", + "examples/models/core/phi/convert_checkpoint.py", + "examples/models/core/qwen/convert_checkpoint.py", + "examples/models/core/qwen2audio/run_chat.py", + "examples/models/core/qwen2audio/run.py", + "examples/models/core/qwen2audio/utils.py", + "examples/models/core/qwenvl/run_chat.py", + "examples/models/core/qwenvl/run.py", + "examples/models/core/qwenvl/show_pic.py", + "examples/models/core/qwenvl/vit_onnx_trt.py", + "examples/models/core/recurrentgemma/convert_checkpoint.py", + "examples/models/core/vit/convert_checkpoint.py", + "examples/models/core/whisper/convert_checkpoint.py", + "examples/models/core/whisper/distil_whisper/convert_from_distil_whisper.py", + "examples/models/core/whisper/run.py", + "examples/models/core/whisper/tokenizer.py", + "examples/models/core/whisper/whisper_utils.py", + "examples/ngram/run_dtm_ngram.py", + "examples/openai_triton/manual_plugin/build.py", + "examples/openai_triton/manual_plugin/fmha_triton.py", + "examples/openai_triton/manual_plugin/plugin.py", + "examples/openai_triton/manual_plugin/run.py", + "examples/openai_triton/plugin_autogen/build_engine.py", + "examples/openai_triton/plugin_autogen/kernel_config.py", + "examples/openai_triton/plugin_autogen/run_engine.py", + "examples/python_plugin/build_lookup.py", + "examples/python_plugin/plugin_lib/__init__.py", + "examples/python_plugin/plugin_lib/lookup_kernel.py", + "examples/python_plugin/plugin_lib/lookup_plugin.py", + "examples/python_plugin/run_lookup.py", + "examples/quantization/quantize_mixed_precision_moe.py", + "examples/quantization/quantize.py", + "examples/ray_orchestrator/llm_inference_async_ray.py", + "examples/ray_orchestrator/llm_inference_distributed_ray.py", + "examples/redrafter/convert_checkpoint.py", + "examples/run.py", + "examples/scaffolding/contrib/AsyncGeneration/stream_generation_controller.py", + "examples/scaffolding/contrib/AsyncGeneration/stream_generation_run.py", + "examples/scaffolding/contrib/DeepConf/run_generation.py", + "examples/scaffolding/contrib/Dynasor/scaffolding_dynasor_run.py", + "examples/scaffolding/contrib/mcp/e2b/e2bserver.py", + "examples/scaffolding/contrib/mcp/e2b/main.py", + "examples/scaffolding/contrib/mcp/mcptest.py", + "examples/scaffolding/contrib/mcp/weather/weather.py", + "examples/scaffolding/contrib/mcp/websearch/main.py", + "examples/scaffolding/contrib/mcp/websearch/websearch.py", + "examples/scaffolding/contrib/TreeInference/run_mcts_example.py", + "examples/scaffolding/contrib/TreeInference/run_tot_example.py", + "examples/scaffolding/run_basic_generation.py", + "examples/scaffolding/run_best_of_n_with_reward.py", + "examples/scaffolding/run_majority_vote_aime24.py", + "examples/scaffolding/token_budget_majority_vote.py", + "examples/serve/openai_chat_client_for_multimodal.py", + "examples/serve/openai_chat_client.py", + "examples/serve/openai_completion_client_for_lora.py", + "examples/serve/openai_completion_client_json_schema.py", + "examples/serve/openai_completion_client.py", + "examples/summarize.py", + "examples/utils.py", + "examples/wide_ep/ep_load_balancer/generate_eplb_config.py", + "examples/wide_ep/ep_load_balancer/report_load_statistics.py", + "examples/wide_ep/ep_load_balancer/utils.py", + "examples/wide_ep/slurm_scripts/process_gen_iterlog.py", + "jenkins/scripts/mergeWaiveList.py", + "jenkins/scripts/open_search_db.py", + "jenkins/scripts/test_rerun.py", + "scripts/build_cpp_examples.py", + "scripts/build_wheel.py", + "scripts/check_test_list.py", + "scripts/dco_check.py", + "scripts/format_test_list.py", + "scripts/generate_duration.py", + "scripts/generate_lock_file.py", + "scripts/get_wheel_from_package.py", + "scripts/git_replace.py", + "scripts/package_trt_llm.py", + "scripts/release_check.py", + "scripts/rename_docker_images.py", + "scripts/test_to_stage_mapping.py", + "setup.py", + "tensorrt_llm/__init__.py", + "tensorrt_llm/_ray_utils.py", + "tensorrt_llm/_tensorrt_engine/__init__.py", + "tensorrt_llm/_torch/__init__.py", + "tensorrt_llm/_torch/attention_backend/__init__.py", + "tensorrt_llm/_torch/attention_backend/flashinfer.py", + "tensorrt_llm/_torch/attention_backend/interface.py", + "tensorrt_llm/_torch/attention_backend/sparse/__init__.py", + "tensorrt_llm/_torch/attention_backend/sparse/dsa.py", + "tensorrt_llm/_torch/attention_backend/sparse/kernel.py", + "tensorrt_llm/_torch/attention_backend/sparse/rocket.py", + "tensorrt_llm/_torch/attention_backend/sparse/utils.py", + "tensorrt_llm/_torch/attention_backend/star_flashinfer.py", + "tensorrt_llm/_torch/attention_backend/trtllm.py", + "tensorrt_llm/_torch/attention_backend/utils.py", + "tensorrt_llm/_torch/attention_backend/vanilla.py", + "tensorrt_llm/_torch/autotuner.py", + "tensorrt_llm/_torch/compilation/__init__.py", + "tensorrt_llm/_torch/compilation/backend.py", + "tensorrt_llm/_torch/compilation/multi_stream/__init__.py", + "tensorrt_llm/_torch/compilation/multi_stream/auto_multi_stream.py", + "tensorrt_llm/_torch/compilation/patterns/__init__.py", + "tensorrt_llm/_torch/compilation/patterns/ar_residual_norm.py", + "tensorrt_llm/_torch/compilation/patterns/residual_add_norm.py", + "tensorrt_llm/_torch/compilation/piecewise_optimizer.py", + "tensorrt_llm/_torch/compilation/recover_pass.py", + "tensorrt_llm/_torch/compilation/remove_copy_pass.py", + "tensorrt_llm/_torch/compilation/utils.py", + "tensorrt_llm/_torch/configs/deepseek_v3.py", + "tensorrt_llm/_torch/cublaslt_utils.py", + "tensorrt_llm/_torch/custom_ops/__init__.py", + "tensorrt_llm/_torch/custom_ops/cpp_custom_ops.py", + "tensorrt_llm/_torch/custom_ops/cute_dsl_custom_ops.py", + "tensorrt_llm/_torch/custom_ops/flashinfer_custom_ops.py", + "tensorrt_llm/_torch/custom_ops/torch_custom_ops.py", + "tensorrt_llm/_torch/custom_ops/trtllm_gen_custom_ops.py", + "tensorrt_llm/_torch/custom_ops/userbuffers_custom_ops.py", + "tensorrt_llm/_torch/cute_dsl_kernels/__init__.py", + "tensorrt_llm/_torch/cute_dsl_kernels/blackwell/__init__.py", + "tensorrt_llm/_torch/cute_dsl_kernels/blackwell/custom_pipeline.py", + "tensorrt_llm/_torch/cute_dsl_kernels/blackwell/dense_blockscaled_gemm_persistent.py", + "tensorrt_llm/_torch/cute_dsl_kernels/blackwell/utils.py", + "tensorrt_llm/_torch/cute_dsl_utils.py", + "tensorrt_llm/_torch/debug/__init__.py", + "tensorrt_llm/_torch/debug/debug_hook.py", + "tensorrt_llm/_torch/device_mesh.py", + "tensorrt_llm/_torch/distributed/__init__.py", + "tensorrt_llm/_torch/distributed/communicator.py", + "tensorrt_llm/_torch/distributed/moe_alltoall.py", + "tensorrt_llm/_torch/distributed/ops.py", + "tensorrt_llm/_torch/distributed/pg_utils.py", + "tensorrt_llm/_torch/expert_statistic.py", + "tensorrt_llm/_torch/flashinfer_utils.py", + "tensorrt_llm/_torch/hostfunc.py", + "tensorrt_llm/_torch/llm.py", + "tensorrt_llm/_torch/memory_buffer_utils.py", + "tensorrt_llm/_torch/metadata.py", + "tensorrt_llm/_torch/model_config.py", + "tensorrt_llm/_torch/models/__init__.py", + "tensorrt_llm/_torch/models/checkpoints/__init__.py", + "tensorrt_llm/_torch/models/checkpoints/auto_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/base_checkpoint_loader.py", + "tensorrt_llm/_torch/models/checkpoints/base_config_loader.py", + "tensorrt_llm/_torch/models/checkpoints/base_weight_loader.py", + "tensorrt_llm/_torch/models/checkpoints/base_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/__init__.py", + "tensorrt_llm/_torch/models/checkpoints/hf/checkpoint_loader.py", + "tensorrt_llm/_torch/models/checkpoints/hf/config_loader.py", + "tensorrt_llm/_torch/models/checkpoints/hf/gemma3_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/llama4_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/mixtral_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/nemotron_h_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/qwen2_moe_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/qwen2vl_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/qwen3_moe_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/qwen3_next_weight_mapper.py", + "tensorrt_llm/_torch/models/checkpoints/hf/weight_loader.py", + "tensorrt_llm/_torch/models/checkpoints/hf/weight_mapper.py", + "tensorrt_llm/_torch/models/modeling_auto.py", + "tensorrt_llm/_torch/models/modeling_bert.py", + "tensorrt_llm/_torch/models/modeling_clip.py", + "tensorrt_llm/_torch/models/modeling_deepseekv3.py", + "tensorrt_llm/_torch/models/modeling_exaone4.py", + "tensorrt_llm/_torch/models/modeling_gemma3.py", + "tensorrt_llm/_torch/models/modeling_gemma3vl.py", + "tensorrt_llm/_torch/models/modeling_gpt_oss.py", + "tensorrt_llm/_torch/models/modeling_hunyuan_dense.py", + "tensorrt_llm/_torch/models/modeling_hunyuan_moe.py", + "tensorrt_llm/_torch/models/modeling_hyperclovax.py", + "tensorrt_llm/_torch/models/modeling_llama_min_latency.py", + "tensorrt_llm/_torch/models/modeling_llama.py", + "tensorrt_llm/_torch/models/modeling_llava_next.py", + "tensorrt_llm/_torch/models/modeling_mistral.py", + "tensorrt_llm/_torch/models/modeling_mixtral.py", + "tensorrt_llm/_torch/models/modeling_mllama.py", + "tensorrt_llm/_torch/models/modeling_multimodal_encoder.py", + "tensorrt_llm/_torch/models/modeling_multimodal_utils.py", + "tensorrt_llm/_torch/models/modeling_nanov2vlm.py", + "tensorrt_llm/_torch/models/modeling_nemotron_h.py", + "tensorrt_llm/_torch/models/modeling_nemotron_nas.py", + "tensorrt_llm/_torch/models/modeling_nemotron.py", + "tensorrt_llm/_torch/models/modeling_phi3.py", + "tensorrt_llm/_torch/models/modeling_phi4mm.py", + "tensorrt_llm/_torch/models/modeling_qwen_moe.py", + "tensorrt_llm/_torch/models/modeling_qwen.py", + "tensorrt_llm/_torch/models/modeling_qwen2vl.py", + "tensorrt_llm/_torch/models/modeling_qwen3_moe.py", + "tensorrt_llm/_torch/models/modeling_qwen3_next.py", + "tensorrt_llm/_torch/models/modeling_qwen3.py", + "tensorrt_llm/_torch/models/modeling_radio.py", + "tensorrt_llm/_torch/models/modeling_seedoss.py", + "tensorrt_llm/_torch/models/modeling_siglip.py", + "tensorrt_llm/_torch/models/modeling_speculative.py", + "tensorrt_llm/_torch/models/modeling_utils.py", + "tensorrt_llm/_torch/models/modeling_vila.py", + "tensorrt_llm/_torch/modules/__init__.py", + "tensorrt_llm/_torch/modules/attention.py", + "tensorrt_llm/_torch/modules/decoder_layer.py", + "tensorrt_llm/_torch/modules/embedding.py", + "tensorrt_llm/_torch/modules/fla/__init__.py", + "tensorrt_llm/_torch/modules/fla/chunk_delta_h.py", + "tensorrt_llm/_torch/modules/fla/chunk_o.py", + "tensorrt_llm/_torch/modules/fla/chunk_scaled_dot_kkt.py", + "tensorrt_llm/_torch/modules/fla/chunk.py", + "tensorrt_llm/_torch/modules/fla/cumsum.py", + "tensorrt_llm/_torch/modules/fla/fused_recurrent.py", + "tensorrt_llm/_torch/modules/fla/fused_sigmoid_gating_recurrent.py", + "tensorrt_llm/_torch/modules/fla/index.py", + "tensorrt_llm/_torch/modules/fla/l2norm.py", + "tensorrt_llm/_torch/modules/fla/layernorm_gated.py", + "tensorrt_llm/_torch/modules/fla/op.py", + "tensorrt_llm/_torch/modules/fla/solve_tril.py", + "tensorrt_llm/_torch/modules/fla/utils.py", + "tensorrt_llm/_torch/modules/fla/wy_fast.py", + "tensorrt_llm/_torch/modules/fused_moe/__init__.py", + "tensorrt_llm/_torch/modules/fused_moe/create_moe.py", + "tensorrt_llm/_torch/modules/fused_moe/deep_ep_utils.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_cute_dsl.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_cutlass.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_deepgemm.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_triton.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_vanilla.py", + "tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py", + "tensorrt_llm/_torch/modules/fused_moe/interface.py", + "tensorrt_llm/_torch/modules/fused_moe/moe_load_balancer.py", + "tensorrt_llm/_torch/modules/fused_moe/ops/__init__.py", + "tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_cutlass.py", + "tensorrt_llm/_torch/modules/fused_moe/ops/moe_op_deepgemm.py", + "tensorrt_llm/_torch/modules/fused_moe/ops/moe_op.py", + "tensorrt_llm/_torch/modules/fused_moe/quantization.py", + "tensorrt_llm/_torch/modules/fused_moe/routing.py", + "tensorrt_llm/_torch/modules/gated_mlp.py", + "tensorrt_llm/_torch/modules/layer_norm.py", + "tensorrt_llm/_torch/modules/linear.py", + "tensorrt_llm/_torch/modules/logits_processor.py", + "tensorrt_llm/_torch/modules/mamba/__init__.py", + "tensorrt_llm/_torch/modules/mamba/causal_conv1d.py", + "tensorrt_llm/_torch/modules/mamba/layernorm_gated.py", + "tensorrt_llm/_torch/modules/mamba/mamba2_metadata.py", + "tensorrt_llm/_torch/modules/mamba/mamba2_mixer.py", + "tensorrt_llm/_torch/modules/mamba/selective_state_update.py", + "tensorrt_llm/_torch/modules/mamba/softplus.py", + "tensorrt_llm/_torch/modules/mamba/ssd_bmm.py", + "tensorrt_llm/_torch/modules/mamba/ssd_chunk_scan.py", + "tensorrt_llm/_torch/modules/mamba/ssd_chunk_state.py", + "tensorrt_llm/_torch/modules/mamba/ssd_combined.py", + "tensorrt_llm/_torch/modules/mamba/ssd_state_passing.py", + "tensorrt_llm/_torch/modules/mlp.py", + "tensorrt_llm/_torch/modules/multi_stream_utils.py", + "tensorrt_llm/_torch/modules/qk_norm_attention.py", + "tensorrt_llm/_torch/modules/rms_norm.py", + "tensorrt_llm/_torch/modules/rotary_embedding.py", + "tensorrt_llm/_torch/modules/swiglu.py", + "tensorrt_llm/_torch/modules/triton_linear.py", + "tensorrt_llm/_torch/peft/__init__.py", + "tensorrt_llm/_torch/peft/lora/__init__.py", + "tensorrt_llm/_torch/peft/lora/layer.py", + "tensorrt_llm/_torch/pyexecutor/__init__.py", + "tensorrt_llm/_torch/pyexecutor/_util.py", + "tensorrt_llm/_torch/pyexecutor/config_utils.py", + "tensorrt_llm/_torch/pyexecutor/config.py", + "tensorrt_llm/_torch/pyexecutor/cuda_graph_runner.py", + "tensorrt_llm/_torch/pyexecutor/executor_request_queue.py", + "tensorrt_llm/_torch/pyexecutor/finish_reason.py", + "tensorrt_llm/_torch/pyexecutor/grammar_matcher.py", + "tensorrt_llm/_torch/pyexecutor/guided_decoder.py", + "tensorrt_llm/_torch/pyexecutor/handle_additional_outputs.py", + "tensorrt_llm/_torch/pyexecutor/handle_logits.py", + "tensorrt_llm/_torch/pyexecutor/kv_cache_connector.py", + "tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py", + "tensorrt_llm/_torch/pyexecutor/layerwise_nvtx_marker.py", + "tensorrt_llm/_torch/pyexecutor/llm_request.py", + "tensorrt_llm/_torch/pyexecutor/make_decoding_batch_input_output.py", + "tensorrt_llm/_torch/pyexecutor/mamba_cache_manager.py", + "tensorrt_llm/_torch/pyexecutor/model_engine.py", + "tensorrt_llm/_torch/pyexecutor/model_loader.py", + "tensorrt_llm/_torch/pyexecutor/py_executor_creator.py", + "tensorrt_llm/_torch/pyexecutor/py_executor.py", + "tensorrt_llm/_torch/pyexecutor/resource_manager.py", + "tensorrt_llm/_torch/pyexecutor/scheduler.py", + "tensorrt_llm/_torch/pyexecutor/seq_slot_manager.py", + "tensorrt_llm/_torch/shared_tensor/__init__.py", + "tensorrt_llm/_torch/shared_tensor/shared_tensor.py", + "tensorrt_llm/_torch/speculative/__init__.py", + "tensorrt_llm/_torch/speculative/auto_heuristic.py", + "tensorrt_llm/_torch/speculative/drafter.py", + "tensorrt_llm/_torch/speculative/drafting_loops.py", + "tensorrt_llm/_torch/speculative/eagle3.py", + "tensorrt_llm/_torch/speculative/interface.py", + "tensorrt_llm/_torch/speculative/model_drafter.py", + "tensorrt_llm/_torch/speculative/mtp.py", + "tensorrt_llm/_torch/speculative/ngram.py", + "tensorrt_llm/_torch/speculative/save_hidden_state.py", + "tensorrt_llm/_torch/speculative/spec_tree_manager.py", + "tensorrt_llm/_torch/speculative/speculation_gate.py", + "tensorrt_llm/_torch/speculative/utils.py", + "tensorrt_llm/_torch/utils.py", + "tensorrt_llm/_torch/virtual_memory.py", + "tensorrt_llm/_utils.py", + "tensorrt_llm/bench/__init__.py", + "tensorrt_llm/bench/benchmark/__init__.py", + "tensorrt_llm/bench/benchmark/low_latency.py", + "tensorrt_llm/bench/benchmark/throughput.py", + "tensorrt_llm/bench/benchmark/utils/__init__.py", + "tensorrt_llm/bench/benchmark/utils/asynchronous.py", + "tensorrt_llm/bench/benchmark/utils/general.py", + "tensorrt_llm/bench/benchmark/utils/processes.py", + "tensorrt_llm/bench/build/__init__.py", + "tensorrt_llm/bench/build/build.py", + "tensorrt_llm/bench/build/dataclasses.py", + "tensorrt_llm/bench/build/tuning.py", + "tensorrt_llm/bench/build/utils.py", + "tensorrt_llm/bench/dataclasses/__init__.py", + "tensorrt_llm/bench/dataclasses/configuration.py", + "tensorrt_llm/bench/dataclasses/engine.py", + "tensorrt_llm/bench/dataclasses/enums.py", + "tensorrt_llm/bench/dataclasses/general.py", + "tensorrt_llm/bench/dataclasses/reporting.py", + "tensorrt_llm/bench/dataclasses/statistics.py", + "tensorrt_llm/bench/utils/__init__.py", + "tensorrt_llm/bench/utils/data.py", + "tensorrt_llm/builder.py", + "tensorrt_llm/commands/__init__.py", + "tensorrt_llm/commands/bench.py", + "tensorrt_llm/commands/build.py", + "tensorrt_llm/commands/eval.py", + "tensorrt_llm/commands/prune.py", + "tensorrt_llm/commands/refit.py", + "tensorrt_llm/commands/serve.py", + "tensorrt_llm/evaluate/__init__.py", + "tensorrt_llm/evaluate/cnn_dailymail.py", + "tensorrt_llm/evaluate/interface.py", + "tensorrt_llm/evaluate/json_mode_eval.py", + "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/_generate_configs.py", + "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/utils.py", + "tensorrt_llm/evaluate/lm_eval.py", + "tensorrt_llm/evaluate/longbench_v2.py", + "tensorrt_llm/evaluate/mmlu.py", + "tensorrt_llm/executor/__init__.py", + "tensorrt_llm/executor/base_worker.py", + "tensorrt_llm/executor/executor.py", + "tensorrt_llm/executor/ipc.py", + "tensorrt_llm/executor/postproc_worker.py", + "tensorrt_llm/executor/proxy.py", + "tensorrt_llm/executor/ray_executor.py", + "tensorrt_llm/executor/ray_gpu_worker.py", + "tensorrt_llm/executor/request.py", + "tensorrt_llm/executor/result.py", + "tensorrt_llm/executor/rpc_proxy.py", + "tensorrt_llm/executor/rpc_worker.py", + "tensorrt_llm/executor/rpc/__init__.py", + "tensorrt_llm/executor/rpc/rpc_client.py", + "tensorrt_llm/executor/rpc/rpc_common.py", + "tensorrt_llm/executor/rpc/rpc_server.py", + "tensorrt_llm/executor/utils.py", + "tensorrt_llm/executor/worker.py", + "tensorrt_llm/functional.py", + "tensorrt_llm/inputs/__init__.py", + "tensorrt_llm/inputs/data.py", + "tensorrt_llm/inputs/evs.py", + "tensorrt_llm/inputs/multimodal.py", + "tensorrt_llm/inputs/registry.py", + "tensorrt_llm/inputs/utils.py", + "tensorrt_llm/layers/__init__.py", + "tensorrt_llm/layers/activation.py", + "tensorrt_llm/layers/attention.py", + "tensorrt_llm/layers/cast.py", + "tensorrt_llm/layers/conv.py", + "tensorrt_llm/layers/embedding.py", + "tensorrt_llm/layers/language_adapter.py", + "tensorrt_llm/layers/linear.py", + "tensorrt_llm/layers/lora.py", + "tensorrt_llm/layers/mlp.py", + "tensorrt_llm/layers/moe.py", + "tensorrt_llm/layers/normalization.py", + "tensorrt_llm/layers/pooling.py", + "tensorrt_llm/layers/recurrent.py", + "tensorrt_llm/layers/ssm.py", + "tensorrt_llm/llmapi/__init__.py", + "tensorrt_llm/llmapi/build_cache.py", + "tensorrt_llm/llmapi/disagg_utils.py", + "tensorrt_llm/llmapi/kv_cache_type.py", + "tensorrt_llm/llmapi/llm_args.py", + "tensorrt_llm/llmapi/llm_utils.py", + "tensorrt_llm/llmapi/llm.py", + "tensorrt_llm/llmapi/mgmn_leader_node.py", + "tensorrt_llm/llmapi/mgmn_worker_node.py", + "tensorrt_llm/llmapi/mm_encoder.py", + "tensorrt_llm/llmapi/mpi_session.py", + "tensorrt_llm/llmapi/reasoning_parser.py", + "tensorrt_llm/llmapi/tokenizer.py", + "tensorrt_llm/llmapi/tracer.py", + "tensorrt_llm/llmapi/tracing.py", + "tensorrt_llm/llmapi/utils.py", + "tensorrt_llm/lora_helper.py", + "tensorrt_llm/mapping.py", + "tensorrt_llm/math_utils.py", + "tensorrt_llm/metrics/__init__.py", + "tensorrt_llm/metrics/collector.py", + "tensorrt_llm/metrics/enums.py", + "tensorrt_llm/models/__init__.py", + "tensorrt_llm/models/automodel.py", + "tensorrt_llm/models/baichuan/__init__.py", + "tensorrt_llm/models/baichuan/config.py", + "tensorrt_llm/models/baichuan/convert.py", + "tensorrt_llm/models/baichuan/model.py", + "tensorrt_llm/models/bert/__init__.py", + "tensorrt_llm/models/bert/config.py", + "tensorrt_llm/models/bert/convert.py", + "tensorrt_llm/models/bert/model.py", + "tensorrt_llm/models/bloom/__init__.py", + "tensorrt_llm/models/bloom/model.py", + "tensorrt_llm/models/chatglm/__init__.py", + "tensorrt_llm/models/chatglm/config.py", + "tensorrt_llm/models/chatglm/convert.py", + "tensorrt_llm/models/chatglm/model.py", + "tensorrt_llm/models/clip/__init__.py", + "tensorrt_llm/models/clip/model.py", + "tensorrt_llm/models/cogvlm/__init__.py", + "tensorrt_llm/models/cogvlm/config.py", + "tensorrt_llm/models/cogvlm/convert.py", + "tensorrt_llm/models/cogvlm/model.py", + "tensorrt_llm/models/commandr/__init__.py", + "tensorrt_llm/models/commandr/config.py", + "tensorrt_llm/models/commandr/model.py", + "tensorrt_llm/models/convert_utils.py", + "tensorrt_llm/models/dbrx/__init__.py", + "tensorrt_llm/models/dbrx/config.py", + "tensorrt_llm/models/dbrx/model.py", + "tensorrt_llm/models/deepseek_v1/__init__.py", + "tensorrt_llm/models/deepseek_v1/config.py", + "tensorrt_llm/models/deepseek_v1/convert.py", + "tensorrt_llm/models/deepseek_v1/model.py", + "tensorrt_llm/models/deepseek_v2/__init__.py", + "tensorrt_llm/models/deepseek_v2/config.py", + "tensorrt_llm/models/deepseek_v2/convert.py", + "tensorrt_llm/models/deepseek_v2/model.py", + "tensorrt_llm/models/dit/__init__.py", + "tensorrt_llm/models/dit/model.py", + "tensorrt_llm/models/eagle/__init__.py", + "tensorrt_llm/models/eagle/config.py", + "tensorrt_llm/models/eagle/model.py", + "tensorrt_llm/models/enc_dec/__init__.py", + "tensorrt_llm/models/enc_dec/model.py", + "tensorrt_llm/models/falcon/__init__.py", + "tensorrt_llm/models/falcon/config.py", + "tensorrt_llm/models/falcon/convert.py", + "tensorrt_llm/models/falcon/model.py", + "tensorrt_llm/models/gemma/__init__.py", + "tensorrt_llm/models/gemma/config.py", + "tensorrt_llm/models/gemma/convert.py", + "tensorrt_llm/models/gemma/model.py", + "tensorrt_llm/models/gemma/smoothquant.py", + "tensorrt_llm/models/gemma/utils/__init__.py", + "tensorrt_llm/models/gemma/utils/layers.py", + "tensorrt_llm/models/gemma/utils/modules.py", + "tensorrt_llm/models/gemma/utils/params.py", + "tensorrt_llm/models/gemma/utils/positional_embeddings.py", + "tensorrt_llm/models/gemma/utils/sampler.py", + "tensorrt_llm/models/gemma/utils/transformer.py", + "tensorrt_llm/models/gemma/weight.py", + "tensorrt_llm/models/generation_mixin.py", + "tensorrt_llm/models/gpt/__init__.py", + "tensorrt_llm/models/gpt/config.py", + "tensorrt_llm/models/gpt/convert.py", + "tensorrt_llm/models/gpt/model.py", + "tensorrt_llm/models/gptj/__init__.py", + "tensorrt_llm/models/gptj/config.py", + "tensorrt_llm/models/gptj/convert.py", + "tensorrt_llm/models/gptj/model.py", + "tensorrt_llm/models/gptneox/__init__.py", + "tensorrt_llm/models/gptneox/model.py", + "tensorrt_llm/models/grok/__init__.py", + "tensorrt_llm/models/grok/convert.py", + "tensorrt_llm/models/grok/model.py", + "tensorrt_llm/models/grok/weight.py", + "tensorrt_llm/models/llama/__init__.py", + "tensorrt_llm/models/llama/config.py", + "tensorrt_llm/models/llama/convert.py", + "tensorrt_llm/models/llama/model.py", + "tensorrt_llm/models/mamba/__init__.py", + "tensorrt_llm/models/mamba/config.py", + "tensorrt_llm/models/mamba/convert.py", + "tensorrt_llm/models/mamba/model.py", + "tensorrt_llm/models/medusa/__init__.py", + "tensorrt_llm/models/medusa/config.py", + "tensorrt_llm/models/medusa/model.py", + "tensorrt_llm/models/medusa/weight.py", + "tensorrt_llm/models/mllama/__init__.py", + "tensorrt_llm/models/mllama/config.py", + "tensorrt_llm/models/mllama/model.py", + "tensorrt_llm/models/mmdit_sd3/__init__.py", + "tensorrt_llm/models/mmdit_sd3/config.py", + "tensorrt_llm/models/mmdit_sd3/model.py", + "tensorrt_llm/models/model_weights_loader.py", + "tensorrt_llm/models/modeling_utils.py", + "tensorrt_llm/models/mpt/__init__.py", + "tensorrt_llm/models/mpt/model.py", + "tensorrt_llm/models/multimodal_encoders/__init__.py", + "tensorrt_llm/models/multimodal_encoders/config.py", + "tensorrt_llm/models/multimodal_encoders/model.py", + "tensorrt_llm/models/nemotron_nas/__init__.py", + "tensorrt_llm/models/nemotron_nas/config.py", + "tensorrt_llm/models/nemotron_nas/convert.py", + "tensorrt_llm/models/nemotron_nas/layer_config.py", + "tensorrt_llm/models/nemotron_nas/model.py", + "tensorrt_llm/models/opt/__init__.py", + "tensorrt_llm/models/opt/model.py", + "tensorrt_llm/models/phi/__init__.py", + "tensorrt_llm/models/phi/config.py", + "tensorrt_llm/models/phi/convert.py", + "tensorrt_llm/models/phi/model.py", + "tensorrt_llm/models/phi3/__init__.py", + "tensorrt_llm/models/phi3/config.py", + "tensorrt_llm/models/phi3/convert.py", + "tensorrt_llm/models/phi3/model.py", + "tensorrt_llm/models/phi3/split_weights.py", + "tensorrt_llm/models/qwen/__init__.py", + "tensorrt_llm/models/qwen/config.py", + "tensorrt_llm/models/qwen/convert.py", + "tensorrt_llm/models/qwen/model.py", + "tensorrt_llm/models/qwen/utils.py", + "tensorrt_llm/models/recurrentgemma/__init__.py", + "tensorrt_llm/models/recurrentgemma/model.py", + "tensorrt_llm/models/redrafter/__init__.py", + "tensorrt_llm/models/redrafter/drafter.py", + "tensorrt_llm/models/redrafter/model.py", + "tensorrt_llm/models/redrafter/redrafter_helper.py", + "tensorrt_llm/models/stdit/__init__.py", + "tensorrt_llm/models/stdit/config.py", + "tensorrt_llm/models/stdit/model.py", + "tensorrt_llm/models/unet/__init__.py", + "tensorrt_llm/models/unet/attention.py", + "tensorrt_llm/models/unet/embeddings.py", + "tensorrt_llm/models/unet/pp/__init__.py", + "tensorrt_llm/models/unet/pp/attention.py", + "tensorrt_llm/models/unet/pp/conv2d.py", + "tensorrt_llm/models/unet/pp/groupnorm.py", + "tensorrt_llm/models/unet/pp/unet_pp.py", + "tensorrt_llm/models/unet/resnet.py", + "tensorrt_llm/models/unet/unet_2d_blocks.py", + "tensorrt_llm/models/unet/unet_2d_condition.py", + "tensorrt_llm/models/unet/weights.py", + "tensorrt_llm/network.py", + "tensorrt_llm/parameter.py", + "tensorrt_llm/plugin/__init__.py", + "tensorrt_llm/plugin/plugin.py", + "tensorrt_llm/quantization/__init__.py", + "tensorrt_llm/quantization/functional.py", + "tensorrt_llm/quantization/image_processing.py", + "tensorrt_llm/quantization/layers.py", + "tensorrt_llm/quantization/mode.py", + "tensorrt_llm/quantization/quantize_by_modelopt.py", + "tensorrt_llm/quantization/quantize.py", + "tensorrt_llm/quantization/utils/__init__.py", + "tensorrt_llm/quantization/utils/fp4_utils.py", + "tensorrt_llm/quantization/utils/fp8_utils.py", + "tensorrt_llm/ray_stub.py", + "tensorrt_llm/runtime/__init__.py", + "tensorrt_llm/runtime/enc_dec_model_runner.py", + "tensorrt_llm/runtime/generation.py", + "tensorrt_llm/runtime/kv_cache_manager.py", + "tensorrt_llm/runtime/medusa_utils.py", + "tensorrt_llm/runtime/memory_pools/__init__.py", + "tensorrt_llm/runtime/memory_pools/memory_pools_allocator.py", + "tensorrt_llm/runtime/memory_pools/pool.py", + "tensorrt_llm/runtime/memory_pools/pools_kv_cache_manager.py", + "tensorrt_llm/runtime/model_runner_cpp.py", + "tensorrt_llm/runtime/model_runner.py", + "tensorrt_llm/runtime/multimodal_model_runner.py", + "tensorrt_llm/runtime/processor_wrapper/__init__.py", + "tensorrt_llm/runtime/processor_wrapper/mllama_processor_wrapper.py", + "tensorrt_llm/runtime/processor_wrapper/processor_wrapper.py", + "tensorrt_llm/runtime/redrafter_utils.py", + "tensorrt_llm/runtime/session.py", + "tensorrt_llm/scaffolding/__init__.py", + "tensorrt_llm/scaffolding/benchmark.py", + "tensorrt_llm/scaffolding/contrib/__init__.py", + "tensorrt_llm/scaffolding/contrib/AsyncGeneration/__init__.py", + "tensorrt_llm/scaffolding/contrib/AsyncGeneration/stream_generation.py", + "tensorrt_llm/scaffolding/contrib/DeepConf/__init__.py", + "tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_controller.py", + "tensorrt_llm/scaffolding/contrib/DeepConf/deep_conf_utils.py", + "tensorrt_llm/scaffolding/contrib/Dynasor/__init__.py", + "tensorrt_llm/scaffolding/contrib/Dynasor/dynasor_controller.py", + "tensorrt_llm/scaffolding/contrib/Dynasor/evaluator.py", + "tensorrt_llm/scaffolding/contrib/mcp/__init__.py", + "tensorrt_llm/scaffolding/contrib/mcp/chat_handler.py", + "tensorrt_llm/scaffolding/contrib/mcp/chat_task.py", + "tensorrt_llm/scaffolding/contrib/mcp/mcp_controller.py", + "tensorrt_llm/scaffolding/contrib/mcp/mcp_task.py", + "tensorrt_llm/scaffolding/contrib/mcp/mcp_utils.py", + "tensorrt_llm/scaffolding/contrib/mcp/mcp_worker.py", + "tensorrt_llm/scaffolding/contrib/TreeInference/__init__.py", + "tensorrt_llm/scaffolding/contrib/TreeInference/tree_controllers.py", + "tensorrt_llm/scaffolding/controller.py", + "tensorrt_llm/scaffolding/math_utils.py", + "tensorrt_llm/scaffolding/result.py", + "tensorrt_llm/scaffolding/scaffolding_llm.py", + "tensorrt_llm/scaffolding/task_collection.py", + "tensorrt_llm/scaffolding/task.py", + "tensorrt_llm/scaffolding/worker.py", + "tensorrt_llm/scheduling_params.py", + "tensorrt_llm/serialization.py", + "tensorrt_llm/serve/__init__.py", + "tensorrt_llm/serve/chat_utils.py", + "tensorrt_llm/serve/cluster_storage.py", + "tensorrt_llm/serve/disagg_auto_scaling.py", + "tensorrt_llm/serve/harmony_adapter.py", + "tensorrt_llm/serve/metadata_server.py", + "tensorrt_llm/serve/openai_disagg_server.py", + "tensorrt_llm/serve/openai_protocol.py", + "tensorrt_llm/serve/openai_server.py", + "tensorrt_llm/serve/postprocess_handlers.py", + "tensorrt_llm/serve/responses_utils.py", + "tensorrt_llm/serve/router.py", + "tensorrt_llm/serve/scripts/__init__.py", + "tensorrt_llm/serve/scripts/backend_request_func.py", + "tensorrt_llm/serve/scripts/benchmark_dataset.py", + "tensorrt_llm/serve/scripts/benchmark_serving.py", + "tensorrt_llm/serve/scripts/benchmark_utils.py", + "tensorrt_llm/serve/scripts/time_breakdown/__init__.py", + "tensorrt_llm/serve/scripts/time_breakdown/__main__.py", + "tensorrt_llm/serve/scripts/time_breakdown/time_breakdown.py", + "tensorrt_llm/serve/tool_parser/base_tool_parser.py", + "tensorrt_llm/serve/tool_parser/qwen3_tool_parser.py", + "tensorrt_llm/serve/tool_parser/utils.py", + "tensorrt_llm/tools/__init__.py", + "tensorrt_llm/tools/importlib_utils.py", + "tensorrt_llm/tools/layer_wise_benchmarks/deepseekv3_runner.py", + "tensorrt_llm/tools/multimodal_builder.py", + "tensorrt_llm/tools/onnx_utils.py", + "tensorrt_llm/tools/plugin_gen/__init__.py", + "tensorrt_llm/tools/plugin_gen/core.py", + "tensorrt_llm/tools/plugin_gen/plugin_gen.py", + "tensorrt_llm/tools/plugin_gen/shape_infer.py", + "tensorrt_llm/tools/plugin_gen/templates/functional.py", + "tensorrt_llm/tools/ppl.py", + "tensorrt_llm/tools/profiler/nsys_profile_tools/gputrc2graph.py", + "tensorrt_llm/version.py", + "tests/integration/defs/__init__.py", + "tests/integration/defs/accuracy/__init__.py", + "tests/integration/defs/accuracy/accuracy_core.py", + "tests/integration/defs/accuracy/scripts/collect_evaluated_accuracies.py", + "tests/integration/defs/accuracy/scripts/compute_theta_and_thresholds.py", + "tests/integration/defs/accuracy/test_cli_flow.py", + "tests/integration/defs/accuracy/test_disaggregated_serving.py", + "tests/integration/defs/accuracy/test_llm_api_autodeploy.py", + "tests/integration/defs/accuracy/test_llm_api_pytorch_ray.py", + "tests/integration/defs/accuracy/test_llm_api_pytorch.py", + "tests/integration/defs/accuracy/test_llm_api.py", + "tests/integration/defs/ci_profiler.py", + "tests/integration/defs/common.py", + "tests/integration/defs/conftest.py", + "tests/integration/defs/cpp/conftest.py", + "tests/integration/defs/cpp/cpp_common.py", + "tests/integration/defs/cpp/test_e2e.py", + "tests/integration/defs/cpp/test_multi_gpu.py", + "tests/integration/defs/cpp/test_unit_tests.py", + "tests/integration/defs/deterministic/mixtral_deterministic.py", + "tests/integration/defs/deterministic/test_mixtral_deterministic.py", + "tests/integration/defs/disaggregated/test_auto_scaling.py", + "tests/integration/defs/disaggregated/test_disaggregated_etcd.py", + "tests/integration/defs/disaggregated/test_disaggregated_single_gpu.py", + "tests/integration/defs/disaggregated/test_disaggregated.py", + "tests/integration/defs/disaggregated/test_workers.py", + "tests/integration/defs/examples/run_llm_fp8_quant_llama_70b.py", + "tests/integration/defs/examples/run_llm_quickstart_atexit.py", + "tests/integration/defs/examples/serve/test_serve_negative.py", + "tests/integration/defs/examples/serve/test_serve.py", + "tests/integration/defs/examples/test_ad_guided_decoding.py", + "tests/integration/defs/examples/test_bert.py", + "tests/integration/defs/examples/test_bindings.py", + "tests/integration/defs/examples/test_chatglm.py", + "tests/integration/defs/examples/test_commandr.py", + "tests/integration/defs/examples/test_draft_target_model.py", + "tests/integration/defs/examples/test_eagle.py", + "tests/integration/defs/examples/test_enc_dec.py", + "tests/integration/defs/examples/test_exaone.py", + "tests/integration/defs/examples/test_gemma.py", + "tests/integration/defs/examples/test_gpt.py", + "tests/integration/defs/examples/test_gptj.py", + "tests/integration/defs/examples/test_granite.py", + "tests/integration/defs/examples/test_internlm.py", + "tests/integration/defs/examples/test_llama.py", + "tests/integration/defs/examples/test_llm_api_with_mpi.py", + "tests/integration/defs/examples/test_mamba.py", + "tests/integration/defs/examples/test_medusa.py", + "tests/integration/defs/examples/test_mistral.py", + "tests/integration/defs/examples/test_mixtral.py", + "tests/integration/defs/examples/test_multimodal.py", + "tests/integration/defs/examples/test_nemotron_nas.py", + "tests/integration/defs/examples/test_nemotron.py", + "tests/integration/defs/examples/test_ngram.py", + "tests/integration/defs/examples/test_openai.py", + "tests/integration/defs/examples/test_phi.py", + "tests/integration/defs/examples/test_qwen.py", + "tests/integration/defs/examples/test_qwen2audio.py", + "tests/integration/defs/examples/test_qwenvl.py", + "tests/integration/defs/examples/test_ray.py", + "tests/integration/defs/examples/test_recurrentgemma.py", + "tests/integration/defs/examples/test_redrafter.py", + "tests/integration/defs/examples/test_whisper.py", + "tests/integration/defs/llmapi/__init__.py", + "tests/integration/defs/llmapi/_run_llmapi_llm.py", + "tests/integration/defs/llmapi/test_llm_api_connector.py", + "tests/integration/defs/llmapi/test_llm_api_qa.py", + "tests/integration/defs/llmapi/test_llm_e2e.py", + "tests/integration/defs/llmapi/test_llm_examples.py", + "tests/integration/defs/local_venv.py", + "tests/integration/defs/perf/__init__.py", + "tests/integration/defs/perf/allowed_configs.py", + "tests/integration/defs/perf/build.py", + "tests/integration/defs/perf/create_perf_comparison_report.py", + "tests/integration/defs/perf/data_export.py", + "tests/integration/defs/perf/data.py", + "tests/integration/defs/perf/diff_tools.py", + "tests/integration/defs/perf/gpu_clock_lock.py", + "tests/integration/defs/perf/misc.py", + "tests/integration/defs/perf/pytorch_model_config.py", + "tests/integration/defs/perf/sample_options_config.py", + "tests/integration/defs/perf/sampler_options_config.py", + "tests/integration/defs/perf/sanity_perf_check.py", + "tests/integration/defs/perf/session_data_writer.py", + "tests/integration/defs/perf/test_perf.py", + "tests/integration/defs/perf/utils.py", + "tests/integration/defs/runner_interface.py", + "tests/integration/defs/stress_test/stress_test.py", + "tests/integration/defs/sysinfo/get_sysinfo.py", + "tests/integration/defs/test_e2e.py", + "tests/integration/defs/test_fmha.py", + "tests/integration/defs/test_list_parser.py", + "tests/integration/defs/test_list_validation.py", + "tests/integration/defs/test_mlpf_results.py", + "tests/integration/defs/test_sanity.py", + "tests/integration/defs/test_unittests.py", + "tests/integration/defs/triton_server/__init__.py", + "tests/integration/defs/triton_server/build_engines.py", + "tests/integration/defs/triton_server/common.py", + "tests/integration/defs/triton_server/conftest.py", + "tests/integration/defs/triton_server/local_venv.py", + "tests/integration/defs/triton_server/rcca/bug_4323566/inflight_batcher_llm_client_with_end_id.py", + "tests/integration/defs/triton_server/runner_interface.py", + "tests/integration/defs/triton_server/test_list_parser.py", + "tests/integration/defs/triton_server/test_triton_llm.py", + "tests/integration/defs/triton_server/test_triton_memleak.py", + "tests/integration/defs/triton_server/test_triton_multi_node.py", + "tests/integration/defs/triton_server/test_triton_rcca.py", + "tests/integration/defs/triton_server/test_triton.py", + "tests/integration/defs/triton_server/trt_test_alternative.py", + "tests/integration/defs/trt_test_alternative.py", + "tests/integration/defs/utils/__init__.py", + "tests/integration/defs/utils/periodic_junit.py", + "tests/integration/defs/utils/timeout_manager.py", + "tests/microbenchmarks/all_reduce.py", + "tests/microbenchmarks/build_time_benchmark.py", + "tests/microbenchmarks/build_time_dashboard.py", + "tests/scripts/allreduce_perf/allreduce_heuristic_code_gen.py", + "tests/scripts/allreduce_perf/allreduce_perf_viz.py", + "tests/scripts/iteration_log_parser.py", + "tests/scripts/perf-sanity/parse_benchmark_results.py", + "tests/scripts/perf-sanity/run_benchmark_serve.py", + "tests/unittest/_torch/attention/sparse/test_dsa_indexer.py", + "tests/unittest/_torch/attention/sparse/test_flash_mla.py", + "tests/unittest/_torch/attention/sparse/test_rocketkv.py", + "tests/unittest/_torch/attention/sparse/test_sparse_mla_forward.py", + "tests/unittest/_torch/attention/test_attention_mla.py", + "tests/unittest/_torch/attention/test_attention_no_cache.py", + "tests/unittest/_torch/attention/test_attention.py", + "tests/unittest/_torch/attention/test_flashinfer_attention.py", + "tests/unittest/_torch/attention/test_flashinfer_star_attn.py", + "tests/unittest/_torch/attention/test_vanilla_attention.py", + "tests/unittest/_torch/compilation/test_add_norm.py", + "tests/unittest/_torch/debugger/test_debugger_addon.py", + "tests/unittest/_torch/executor/test_chunked_logits.py", + "tests/unittest/_torch/executor/test_executor_request_queue.py", + "tests/unittest/_torch/executor/test_overlap_scheduler.py", + "tests/unittest/_torch/executor/test_pytorch_model_engine.py", + "tests/unittest/_torch/executor/test_resource_manager.py", + "tests/unittest/_torch/executor/test_router_dealer_ipc.py", + "tests/unittest/_torch/helpers.py", + "tests/unittest/_torch/misc/test_autotuner.py", + "tests/unittest/_torch/misc/test_share_tensor.py", + "tests/unittest/_torch/misc/test_virtual_memory.py", + "tests/unittest/_torch/modeling/test_modeling_bert.py", + "tests/unittest/_torch/modeling/test_modeling_clip.py", + "tests/unittest/_torch/modeling/test_modeling_exaone4.py", + "tests/unittest/_torch/modeling/test_modeling_gemma3.py", + "tests/unittest/_torch/modeling/test_modeling_gpt_oss.py", + "tests/unittest/_torch/modeling/test_modeling_llama_min_latency.py", + "tests/unittest/_torch/modeling/test_modeling_llama.py", + "tests/unittest/_torch/modeling/test_modeling_mixtral.py", + "tests/unittest/_torch/modeling/test_modeling_mllama.py", + "tests/unittest/_torch/modeling/test_modeling_nemotron_h.py", + "tests/unittest/_torch/modeling/test_modeling_nemotron_nas.py", + "tests/unittest/_torch/modeling/test_modeling_nemotron.py", + "tests/unittest/_torch/modeling/test_modeling_out_of_tree.py", + "tests/unittest/_torch/modeling/test_modeling_phi3.py", + "tests/unittest/_torch/modeling/test_modeling_qwen_moe.py", + "tests/unittest/_torch/modeling/test_modeling_qwen.py", + "tests/unittest/_torch/modeling/test_modeling_qwen2_5vl.py", + "tests/unittest/_torch/modeling/test_modeling_siglip.py", + "tests/unittest/_torch/modeling/test_modeling_vila.py", + "tests/unittest/_torch/modules/test_fused_moe.py", + "tests/unittest/_torch/modules/test_group_rmn_norm.py", + "tests/unittest/_torch/modules/test_moe_host_sharer.py", + "tests/unittest/_torch/modules/test_moe_load_balancer.py", + "tests/unittest/_torch/modules/test_moe_routing.py", + "tests/unittest/_torch/modules/test_rotary_embedding.py", + "tests/unittest/_torch/modules/test_triton_linear.py", + "tests/unittest/_torch/modules/tests_lora_modules/test_lora_attention_pytorch_flow_vs_trt.py", + "tests/unittest/_torch/modules/tests_lora_modules/test_lora_plugin_vs_lora_op.py", + "tests/unittest/_torch/multi_gpu_modeling/test_deepseek.py", + "tests/unittest/_torch/multi_gpu_modeling/test_llama3.py", + "tests/unittest/_torch/multi_gpu/test_allreduce.py", + "tests/unittest/_torch/multi_gpu/test_alltoall.py", + "tests/unittest/_torch/multi_gpu/test_ar_residual_norm.py", + "tests/unittest/_torch/multi_gpu/test_embedding.py", + "tests/unittest/_torch/multi_gpu/test_linear.py", + "tests/unittest/_torch/multi_gpu/test_lowprecision_allreduce.py", + "tests/unittest/_torch/multi_gpu/test_mnnvl_allreduce.py", + "tests/unittest/_torch/multi_gpu/test_mnnvl_memory.py", + "tests/unittest/_torch/multi_gpu/test_moe_a2a.py", + "tests/unittest/_torch/multi_gpu/test_star_attention.py", + "tests/unittest/_torch/multi_gpu/test_user_buffers.py", + "tests/unittest/_torch/multimodal/test_external_embedding.py", + "tests/unittest/_torch/multimodal/test_find_num_image_tokens.py", + "tests/unittest/_torch/multimodal/test_fuse_input_embeds.py", + "tests/unittest/_torch/multimodal/test_mm_encoder_standalone.py", + "tests/unittest/_torch/multimodal/test_multimodal_runtime.py", + "tests/unittest/_torch/multimodal/test_share_multiparams.py", + "tests/unittest/_torch/pattern_watcher.py", + "tests/unittest/_torch/ray_orchestrator/conftest.py", + "tests/unittest/_torch/ray_orchestrator/multi_gpu/test_executor.py", + "tests/unittest/_torch/ray_orchestrator/multi_gpu/test_mapping.py", + "tests/unittest/_torch/ray_orchestrator/multi_gpu/test_ops_ray.py", + "tests/unittest/_torch/ray_orchestrator/multi_gpu/test_ops.py", + "tests/unittest/_torch/ray_orchestrator/multi_gpu/test_placement.py", + "tests/unittest/_torch/ray_orchestrator/single_gpu/test_cache_transceiver_comm.py", + "tests/unittest/_torch/sampler/test_beam_search.py", + "tests/unittest/_torch/sampler/test_best_of_n.py", + "tests/unittest/_torch/sampler/test_return_logits.py", + "tests/unittest/_torch/sampler/test_torch_multi_arange.py", + "tests/unittest/_torch/sampler/test_trtllm_sampler.py", + "tests/unittest/_torch/speculative/test_draft_target.py", + "tests/unittest/_torch/speculative/test_draft_token_tree_sampling.py", + "tests/unittest/_torch/speculative/test_draft_token_tree_verification.py", + "tests/unittest/_torch/speculative/test_dynamic_spec_decode.py", + "tests/unittest/_torch/speculative/test_eagle3.py", + "tests/unittest/_torch/speculative/test_kv_cache_reuse.py", + "tests/unittest/_torch/speculative/test_mtp.py", + "tests/unittest/_torch/speculative/test_ngram.py", + "tests/unittest/_torch/speculative/test_save_state.py", + "tests/unittest/_torch/speculative/test_spec_gate.py", + "tests/unittest/_torch/speculative/test_torch_rejection_sampling.py", + "tests/unittest/_torch/speculative/test_user_provided.py", + "tests/unittest/_torch/test_connector.py", + "tests/unittest/_torch/thop/parallel/deep_gemm_tests.py", + "tests/unittest/_torch/thop/parallel/test_causal_conv1d_op.py", + "tests/unittest/_torch/thop/parallel/test_cublas_mm.py", + "tests/unittest/_torch/thop/parallel/test_custom_ops.py", + "tests/unittest/_torch/thop/parallel/test_dsv3_fused_a_gemm.py", + "tests/unittest/_torch/thop/parallel/test_dsv3_router_gemm.py", + "tests/unittest/_torch/thop/parallel/test_finegrained_mixed_dtype_gemm.py", + "tests/unittest/_torch/thop/parallel/test_fp4_bmm_quantize.py", + "tests/unittest/_torch/thop/parallel/test_fp4_calculate_global_scale.py", + "tests/unittest/_torch/thop/parallel/test_fp4_gemm_quantize.py", + "tests/unittest/_torch/thop/parallel/test_fp4_linear.py", + "tests/unittest/_torch/thop/parallel/test_fp4_swizzle.py", + "tests/unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py", + "tests/unittest/_torch/thop/parallel/test_fp8_linear.py", + "tests/unittest/_torch/thop/parallel/test_fp8_per_tensor_scale_tllmg_gemm.py", + "tests/unittest/_torch/thop/parallel/test_fp8_quantize.py", + "tests/unittest/_torch/thop/parallel/test_fp8_rowwise_linear.py", + "tests/unittest/_torch/thop/parallel/test_fused_qk_norm_rope.py", + "tests/unittest/_torch/thop/parallel/test_logits_bitmask_op.py", + "tests/unittest/_torch/thop/parallel/test_mamba_conv1d_op.py", + "tests/unittest/_torch/thop/parallel/test_mamba2_chunk_ss_update.py", + "tests/unittest/_torch/thop/parallel/test_moe.py", + "tests/unittest/_torch/thop/parallel/test_noaux_tc.py", + "tests/unittest/_torch/thop/parallel/test_scaled_mm.py", + "tests/unittest/_torch/thop/parallel/test_selective_scan_op.py", + "tests/unittest/_torch/thop/parallel/test_tinygemm2.py", + "tests/unittest/_torch/thop/parallel/test_tllmg_bmm.py", + "tests/unittest/_torch/thop/parallel/test_w4a16_linear.py", + "tests/unittest/_torch/thop/parallel/test_w4a8_linear.py", + "tests/unittest/_torch/thop/parallel/test_w4a8_mxfp4_mxfp8_gemm.py", + "tests/unittest/_torch/thop/parallel/test_weight_only_quant_gemm.py", + "tests/unittest/_torch/thop/parallel/test_weight_only_quant_linear.py", + "tests/unittest/_torch/thop/serial/test_moe_alltoall.py", + "tests/unittest/api_stability/api_stability_core.py", + "tests/unittest/api_stability/test_llm_api.py", + "tests/unittest/bindings/binding_test_utils.py", + "tests/unittest/bindings/test_bindings_moe.py", + "tests/unittest/bindings/test_bindings_ut.py", + "tests/unittest/bindings/test_executor_bindings.py", + "tests/unittest/bindings/test_hostfunc.py", + "tests/unittest/conftest.py", + "tests/unittest/disaggregated/test_cluster_storage.py", + "tests/unittest/disaggregated/test_disagg_cluster_manager_worker.py", + "tests/unittest/disaggregated/test_disagg_utils.py", + "tests/unittest/disaggregated/test_remoteDictionary.py", + "tests/unittest/disaggregated/test_router.py", + "tests/unittest/dump_checkpoint_stats.py", + "tests/unittest/executor/test_base_worker.py", + "tests/unittest/executor/test_rpc_proxy.py", + "tests/unittest/executor/test_rpc_worker.py", + "tests/unittest/executor/test_rpc.py", + "tests/unittest/gc_utils.py", + "tests/unittest/llmapi/__init__.py", + "tests/unittest/llmapi/_run_mpi_comm_task.py", + "tests/unittest/llmapi/_run_multi_llm_tasks.py", + "tests/unittest/llmapi/_run_multi_mpi_comm_tasks.py", + "tests/unittest/llmapi/apps/__init__.py", + "tests/unittest/llmapi/apps/_test_disagg_serving_multi_nodes.py", + "tests/unittest/llmapi/apps/_test_llm_chat.py", + "tests/unittest/llmapi/apps/_test_llm_server.py", + "tests/unittest/llmapi/apps/_test_openai_cache_salt.py", + "tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py", + "tests/unittest/llmapi/apps/_test_openai_chat_harmony.py", + "tests/unittest/llmapi/apps/_test_openai_chat_multimodal.py", + "tests/unittest/llmapi/apps/_test_openai_chat.py", + "tests/unittest/llmapi/apps/_test_openai_completions.py", + "tests/unittest/llmapi/apps/_test_openai_consistent_chat.py", + "tests/unittest/llmapi/apps/_test_openai_lora.py", + "tests/unittest/llmapi/apps/_test_openai_metrics.py", + "tests/unittest/llmapi/apps/_test_openai_misc.py", + "tests/unittest/llmapi/apps/_test_openai_mmencoder.py", + "tests/unittest/llmapi/apps/_test_openai_multi_chat.py", + "tests/unittest/llmapi/apps/_test_openai_multi_gpu.py", + "tests/unittest/llmapi/apps/_test_openai_multi_nodes.py", + "tests/unittest/llmapi/apps/_test_openai_perf_metrics.py", + "tests/unittest/llmapi/apps/_test_openai_prometheus.py", + "tests/unittest/llmapi/apps/_test_openai_reasoning.py", + "tests/unittest/llmapi/apps/_test_openai_responses.py", + "tests/unittest/llmapi/apps/_test_openai_tool_call.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_benchmark.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_duplicated_args.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_example.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_lora.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_benchmark.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_multimodal_example.py", + "tests/unittest/llmapi/apps/_test_trtllm_serve_top_logprobs.py", + "tests/unittest/llmapi/apps/openai_server.py", + "tests/unittest/llmapi/apps/test_tool_parsers.py", + "tests/unittest/llmapi/apps/utils.py", + "tests/unittest/llmapi/lora_test_utils.py", + "tests/unittest/llmapi/run_llm_exit.py", + "tests/unittest/llmapi/run_llm_with_postproc.py", + "tests/unittest/llmapi/run_llm.py", + "tests/unittest/llmapi/test_additional_model_outputs.py", + "tests/unittest/llmapi/test_build_cache.py", + "tests/unittest/llmapi/test_executor.py", + "tests/unittest/llmapi/test_gc_utils.py", + "tests/unittest/llmapi/test_llm_args.py", + "tests/unittest/llmapi/test_llm_download.py", + "tests/unittest/llmapi/test_llm_kv_cache_events.py", + "tests/unittest/llmapi/test_llm_models.py", + "tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py", + "tests/unittest/llmapi/test_llm_multi_gpu.py", + "tests/unittest/llmapi/test_llm_pytorch.py", + "tests/unittest/llmapi/test_llm_quant.py", + "tests/unittest/llmapi/test_llm_utils.py", + "tests/unittest/llmapi/test_llm.py", + "tests/unittest/llmapi/test_memory_profiling.py", + "tests/unittest/llmapi/test_mpi_session.py", + "tests/unittest/llmapi/test_reasoning_parser.py", + "tests/unittest/llmapi/test_serialization.py", + "tests/unittest/llmapi/test_utils.py", + "tests/unittest/others/__init__.py", + "tests/unittest/others/test_builder.py", + "tests/unittest/others/test_convert_spec_decoding_mask_to_packed_mask.py", + "tests/unittest/others/test_debugging_api.py", + "tests/unittest/others/test_exception.py", + "tests/unittest/others/test_export.py", + "tests/unittest/others/test_graph_rewriter.py", + "tests/unittest/others/test_kv_cache_manager.py", + "tests/unittest/others/test_kv_cache_transceiver.py", + "tests/unittest/others/test_kv_cache_update.py", + "tests/unittest/others/test_layer.py", + "tests/unittest/others/test_leak.py", + "tests/unittest/others/test_mapping.py", + "tests/unittest/others/test_model_dtype.py", + "tests/unittest/others/test_module.py", + "tests/unittest/others/test_multimodal_registry.py", + "tests/unittest/others/test_plugins.py", + "tests/unittest/others/test_precision_control.py", + "tests/unittest/others/test_pretrained_config.py", + "tests/unittest/others/test_session.py", + "tests/unittest/others/test_time_breakdown.py", + "tests/unittest/profile_utils.py", + "tests/unittest/scaffolding/__init__.py", + "tests/unittest/scaffolding/test_bench.py", + "tests/unittest/scaffolding/test_parallel_process.py", + "tests/unittest/scaffolding/test_scaffolding.py", + "tests/unittest/scaffolding/test_task_collection.py", + "tests/unittest/scaffolding/test_worker.py", + "tests/unittest/test_model_runner_cpp.py", + "tests/unittest/test_pip_install.py", + "tests/unittest/tools/__init__.py", + "tests/unittest/tools/plugin_gen/__init__.py", + "tests/unittest/tools/plugin_gen/kernel_config.py", + "tests/unittest/tools/plugin_gen/test_core.py", + "tests/unittest/tools/plugin_gen/test_plugin_gen.py", + "tests/unittest/tools/plugin_gen/test_shape_infer.py", + "tests/unittest/tools/test_layer_wise_benchmarks.py", + "tests/unittest/tools/test_prepare_dataset.py", + "tests/unittest/tools/test_test_to_stage_mapping.py", + "tests/unittest/trt/__init__.py", + "tests/unittest/trt/attention/test_bert_attention.py", + "tests/unittest/trt/attention/test_gpt_attention_IFB.py", + "tests/unittest/trt/attention/test_gpt_attention_no_cache.py", + "tests/unittest/trt/attention/test_gpt_attention.py", + "tests/unittest/trt/attention/test_sage_attention.py", + "tests/unittest/trt/functional/__init__.py", + "tests/unittest/trt/functional/test_alibi.py", + "tests/unittest/trt/functional/test_allreduce_norm.py", + "tests/unittest/trt/functional/test_allreduce_prepost_residual_norm.py", + "tests/unittest/trt/functional/test_arange.py", + "tests/unittest/trt/functional/test_argmax.py", + "tests/unittest/trt/functional/test_assertion.py", + "tests/unittest/trt/functional/test_avg_pool2d.py", + "tests/unittest/trt/functional/test_cast.py", + "tests/unittest/trt/functional/test_conv2d.py", + "tests/unittest/trt/functional/test_conv3d.py", + "tests/unittest/trt/functional/test_cos.py", + "tests/unittest/trt/functional/test_cumsum.py", + "tests/unittest/trt/functional/test_dora.py", + "tests/unittest/trt/functional/test_einsum.py", + "tests/unittest/trt/functional/test_embedding_single_gpu.py", + "tests/unittest/trt/functional/test_exp.py", + "tests/unittest/trt/functional/test_expand.py", + "tests/unittest/trt/functional/test_flatten.py", + "tests/unittest/trt/functional/test_flip.py", + "tests/unittest/trt/functional/test_fp4_gemm_ootb.py", + "tests/unittest/trt/functional/test_fp4_gemm.py", + "tests/unittest/trt/functional/test_gather_nd.py", + "tests/unittest/trt/functional/test_gather.py", + "tests/unittest/trt/functional/test_geglu.py", + "tests/unittest/trt/functional/test_gelu.py", + "tests/unittest/trt/functional/test_gemm_swiglu.py", + "tests/unittest/trt/functional/test_group_norm.py", + "tests/unittest/trt/functional/test_identity.py", + "tests/unittest/trt/functional/test_index_select.py", + "tests/unittest/trt/functional/test_interpolate.py", + "tests/unittest/trt/functional/test_logsoftmax.py", + "tests/unittest/trt/functional/test_lora.py", + "tests/unittest/trt/functional/test_low_latency_gemm.py", + "tests/unittest/trt/functional/test_mamba_conv1d.py", + "tests/unittest/trt/functional/test_masked_scatter.py", + "tests/unittest/trt/functional/test_masked_select.py", + "tests/unittest/trt/functional/test_matmul.py", + "tests/unittest/trt/functional/test_meshgrid2d.py", + "tests/unittest/trt/functional/test_moe.py", + "tests/unittest/trt/functional/test_nccl.py", + "tests/unittest/trt/functional/test_nonzero.py", + "tests/unittest/trt/functional/test_outer.py", + "tests/unittest/trt/functional/test_pad.py", + "tests/unittest/trt/functional/test_permute.py", + "tests/unittest/trt/functional/test_pp_reduce_scatter.py", + "tests/unittest/trt/functional/test_quant.py", + "tests/unittest/trt/functional/test_rearrange.py", + "tests/unittest/trt/functional/test_repeat_interleave.py", + "tests/unittest/trt/functional/test_repeat.py", + "tests/unittest/trt/functional/test_rg_lru.py", + "tests/unittest/trt/functional/test_sample.py", + "tests/unittest/trt/functional/test_scatter_nd.py", + "tests/unittest/trt/functional/test_scatter.py", + "tests/unittest/trt/functional/test_select.py", + "tests/unittest/trt/functional/test_selective_scan.py", + "tests/unittest/trt/functional/test_sigmoid.py", + "tests/unittest/trt/functional/test_silu.py", + "tests/unittest/trt/functional/test_sin.py", + "tests/unittest/trt/functional/test_slice.py", + "tests/unittest/trt/functional/test_softplus.py", + "tests/unittest/trt/functional/test_split.py", + "tests/unittest/trt/functional/test_squeeze.py", + "tests/unittest/trt/functional/test_swiglu.py", + "tests/unittest/trt/functional/test_topk.py", + "tests/unittest/trt/functional/test_transpose.py", + "tests/unittest/trt/functional/test_unbind.py", + "tests/unittest/trt/functional/test_unsqueeze.py", + "tests/unittest/trt/functional/test_view.py", + "tests/unittest/trt/functional/test_where.py", + "tests/unittest/trt/model_api/profile_utils.py", + "tests/unittest/trt/model_api/test_model_api_multi_gpu.py", + "tests/unittest/trt/model_api/test_model_level_api.py", + "tests/unittest/trt/model_api/test_model_quantization.py", + "tests/unittest/trt/model/__init__.py", + "tests/unittest/trt/model/eagle/test_decode_draft_tokens_plugin.py", + "tests/unittest/trt/model/eagle/test_prepare_drafter_inputs_plugin.py", + "tests/unittest/trt/model/eagle/test_sample_accept_draft_tokens_plugin.py", + "tests/unittest/trt/model/redrafter/test_beams2tree.py", + "tests/unittest/trt/model/redrafter/test_draft_token_indices.py", + "tests/unittest/trt/model/redrafter/test_draft_token.py", + "tests/unittest/trt/model/redrafter/test_gather_beams.py", + "tests/unittest/trt/model/redrafter/test_mask.py", + "tests/unittest/trt/model/redrafter/test_packed_position_ids.py", + "tests/unittest/trt/model/redrafter/test_prefix_match_indices.py", + "tests/unittest/trt/model/redrafter/test_prepare_input.py", + "tests/unittest/trt/model/redrafter/test_process_logits.py", + "tests/unittest/trt/model/redrafter/test_top1.py", + "tests/unittest/trt/model/redrafter/test_unpack_gen_data.py", + "tests/unittest/trt/model/redrafter/test_validate.py", + "tests/unittest/trt/model/test_gpt_e2e.py", + "tests/unittest/trt/model/test_gpt.py", + "tests/unittest/trt/model/test_llama.py", + "tests/unittest/trt/model/test_mamba.py", + "tests/unittest/trt/model/test_mistral.py", + "tests/unittest/trt/model/test_nemotron_nas.py", + "tests/unittest/trt/model/test_phi.py", + "tests/unittest/trt/model/test_unet.py", + "tests/unittest/trt/python_plugin/plugin_wrapper_utils.py", + "tests/unittest/trt/python_plugin/test_plugin_wrapper.py", + "tests/unittest/trt/quantization/__init__.py", + "tests/unittest/trt/quantization/_utils.py", + "tests/unittest/trt/quantization/test_fp8_quantization.py", + "tests/unittest/trt/quantization/test_fp8_rowwise_gemm.py", + "tests/unittest/trt/quantization/test_functional.py", + "tests/unittest/trt/quantization/test_mode.py", + "tests/unittest/trt/quantization/test_moe_weight_only_quant_matmul.py", + "tests/unittest/trt/quantization/test_qserve_gemm.py", + "tests/unittest/trt/quantization/test_quant_layer.py", + "tests/unittest/trt/quantization/test_quant.py", + "tests/unittest/trt/quantization/test_smooth_quant_gemm.py", + "tests/unittest/trt/quantization/test_smooth_quant_layer_norm.py", + "tests/unittest/trt/quantization/test_smooth_quant_rms_norm.py", + "tests/unittest/trt/quantization/test_weight_only_groupwise_quant_matmul.py", + "tests/unittest/trt/quantization/test_weight_only_quant_matmul.py", + "tests/unittest/utils/__init__.py", + "tests/unittest/utils/cpp_paths.py", + "tests/unittest/utils/llm_data.py", + "tests/unittest/utils/runtime_defaults.py", + "tests/unittest/utils/test_medusa_utils.py", + "tests/unittest/utils/test_prebuilt_whl_cpp_extensions.py", + "tests/unittest/utils/test_util.py", + "tests/unittest/utils/torch_ref.py", + "tests/unittest/utils/util.py", + "triton_backend/all_models/disaggregated_serving/disaggregated_serving_bls/1/model.py", + "triton_backend/all_models/gpt/postprocessing/1/model.py", + "triton_backend/all_models/gpt/preprocessing/1/model.py", + "triton_backend/all_models/gpt/tensorrt_llm/1/model.py", + "triton_backend/all_models/inflight_batcher_llm/postprocessing/1/model.py", + "triton_backend/all_models/inflight_batcher_llm/preprocessing/1/model.py", + "triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/decode.py", + "triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/lib/triton_decoder.py", + "triton_backend/all_models/inflight_batcher_llm/tensorrt_llm_bls/1/model.py", + "triton_backend/all_models/inflight_batcher_llm/tensorrt_llm/1/model.py", + "triton_backend/all_models/llmapi/tensorrt_llm/1/helpers.py", + "triton_backend/all_models/llmapi/tensorrt_llm/1/model.py", + "triton_backend/all_models/multimodal/multimodal_encoders/1/model.py", + "triton_backend/all_models/multimodal/multimodal_encoders/1/multimodal_utils.py", + "triton_backend/all_models/tests/test_decode.py", + "triton_backend/all_models/tests/test_llmapi_python_backend.py", + "triton_backend/all_models/tests/test_multi_image_preprocess.py", + "triton_backend/all_models/tests/test_multimodal_encoders.py", + "triton_backend/all_models/tests/test_python_backend.py", + "triton_backend/all_models/tests/test_triton_decoder.py", + "triton_backend/all_models/whisper/whisper_bls/1/fbank.py", + "triton_backend/all_models/whisper/whisper_bls/1/model.py", + "triton_backend/all_models/whisper/whisper_bls/1/tokenizer.py", + "triton_backend/ci/L0_backend_trtllm/base_metrics_verification_tests.py", + "triton_backend/ci/L0_backend_trtllm/custom_metrics_verification_tests.py", + "triton_backend/inflight_batcher_llm/client/__init__.py", + "triton_backend/inflight_batcher_llm/client/e2e_grpc_speculative_decoding_client.py", + "triton_backend/inflight_batcher_llm/client/end_to_end_grpc_client.py", + "triton_backend/inflight_batcher_llm/client/inflight_batcher_llm_client.py", + "triton_backend/scripts/launch_triton_server.py", + "triton_backend/tools/__init__.py", + "triton_backend/tools/fill_template.py", + "triton_backend/tools/gpt/benchmark_core_model.py", + "triton_backend/tools/gpt/client_async.py", + "triton_backend/tools/gpt/client.py", + "triton_backend/tools/gpt/end_to_end_test.py", + "triton_backend/tools/gpt/gen_input_data.py", + "triton_backend/tools/inflight_batcher_llm/benchmark_core_model.py", + "triton_backend/tools/inflight_batcher_llm/end_to_end_test.py", + "triton_backend/tools/inflight_batcher_llm/speculative_decoding_test.py", + "triton_backend/tools/inflight_batcher_llm/test_max_queue_size.py", + "triton_backend/tools/llmapi_client.py", + "triton_backend/tools/multimodal/client.py", + "triton_backend/tools/tests/__init__.py", + "triton_backend/tools/tests/test_fill_template.py", + "triton_backend/tools/tests/test_llmapi_cancel.py", + "triton_backend/tools/utils/__init__.py", + "triton_backend/tools/utils/utils.py", + "triton_backend/tools/whisper/client.py" ]