Comment and remove unused params

Andrew Briand · Andrew Briand · commit c21a4688401c · 2025-12-01T10:03:50.000-08:00
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
@@ -339,22 +339,18 @@ def flashinfer_trtllm_fp4_routed_moe(
     topk_ids: torch.Tensor,  # Packed
     top_k: int,
     global_num_experts: int,
-    num_expert_group: int | None,
-    topk_group: int | None,
-    custom_routing_function: object | None,
 ) -> torch.Tensor:
     """
-    Apply FlashInfer TensorRT-LLM FP4 MoE kernel.
+    Apply FlashInfer TensorRT-LLM FP4 MoE kernel. Uses packed
+    input top k expert indices and scores rather than computing
+    top k expert indices from scores.
 
     Args:
         layer: The MoE layer with weights and scales
         x: Input tensor
         topk_ids: Ids of selected experts
         top_k: Number of experts to select per token
         global_num_experts: Total number of experts across all ranks
-        num_expert_group: Number of expert groups (for grouped routing)
-        topk_group: Top-k within each group
-        custom_routing_function: Custom routing function (e.g., Llama4)
 
     Returns:
         Output tensor from the MoE layer