[https://nvbugs/5637012][fix] Fix helix unit tests

brb-nv · brb-nv · commit 20edf93331d4 · 2025-11-21T20:54:50.000Z
Signed-off-by: Balaram Buddharaju &lt;169953907+brb-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/attention.py b/tensorrt_llm/_torch/modules/attention.py
@@ -1724,6 +1724,9 @@ def forward_absorption_generation(
             device=q.device,
         )
 
+        # Compute helix_position_offsets for helix parallelism.
+        helix_position_offsets = position_ids if self.mapping.cp_size > 1 else None
+
         rope_stream = self.aux_stream if not has_fp8_kv_cache else None
         if self.k_b_proj_trans.dtype == torch.bfloat16:
             # [num_heads, num_tokens, self.qk_nope_head_dim]
@@ -1737,10 +1740,18 @@ def forward_absorption_generation(
             maybe_execute_in_parallel(
                 lambda: torch.ops.trtllm.bmm_out(
                     q_nope_t, self.k_b_proj_trans.transpose(1, 2), q_nope_out),
-                lambda: self.mqa.mla_rope_generation(
-                    fused_q, q_pe, latent_cache, attn_metadata, cu_q_seqlens,
-                    cu_kv_seqlens, fmha_scheduler_counter, mla_bmm1_scale,
-                    mla_bmm2_scale, quant_q_buffer),
+                lambda: self.mqa.mla_rope_generation(fused_q,
+                                                     q_pe,
+                                                     latent_cache,
+                                                     attn_metadata,
+                                                     cu_q_seqlens,
+                                                     cu_kv_seqlens,
+                                                     fmha_scheduler_counter,
+                                                     mla_bmm1_scale,
+                                                     mla_bmm2_scale,
+                                                     quant_q_buffer,
+                                                     helix_position_offsets=
+                                                     helix_position_offsets),
                 self.ln_events[0],
                 self.ln_events[1],
                 rope_stream,
@@ -1758,10 +1769,18 @@ def forward_absorption_generation(
                     q_nope_out,
                     self.k_b_proj_trans_dequant,
                 ),
-                lambda: self.mqa.mla_rope_generation(
-                    fused_q, q_pe, latent_cache, attn_metadata, cu_q_seqlens,
-                    cu_kv_seqlens, fmha_scheduler_counter, mla_bmm1_scale,
-                    mla_bmm2_scale, quant_q_buffer),
+                lambda: self.mqa.mla_rope_generation(fused_q,
+                                                     q_pe,
+                                                     latent_cache,
+                                                     attn_metadata,
+                                                     cu_q_seqlens,
+                                                     cu_kv_seqlens,
+                                                     fmha_scheduler_counter,
+                                                     mla_bmm1_scale,
+                                                     mla_bmm2_scale,
+                                                     quant_q_buffer,
+                                                     helix_position_offsets=
+                                                     helix_position_offsets),
                 self.ln_events[0],
                 self.ln_events[1],
                 rope_stream,
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -349,7 +349,6 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[throughput_laten
 test_e2e.py::test_openai_chat_multimodal_example SKIP (https://nvbugs/5636894)
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp] SKIP (https://nvbugs/5637220)
 llmapi/test_llm_examples.py::test_llmapi_example_multilora SKIP (https://nvbugs/5636857)
-unittest/_torch/modules/test_mla_helix.py::test_mla_helix_distributed SKIP (https://nvbugspro.nvidia.com/bug/5637012)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3[cutlass] SKIP (https://nvbugs/5636916)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5616182)
 examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-small-128k-instruct-fp8-bfloat16] SKIP (https://nvbugs/5465143)