We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f79b5b0 commit b39aa3bCopy full SHA for b39aa3b
vllm/v1/attention/backends/mla/common.py
@@ -2043,16 +2043,10 @@ def forward(
2043
)
2044
decode_q0[..., : ql_nope_shape[2]].copy_(decode_ql_nope)
2045
decode_q0[..., ql_nope_shape[2] :].copy_(decode_q_pe)
2046
- decode_q = torch.empty(
2047
- decode_q_shape,
2048
- device=decode_ql_nope.device,
2049
- dtype=torch.float8_e4m3fn,
2050
- )
2051
2052
decode_q, _ = ops.scaled_fp8_quant(
2053
decode_q0.view(decode_q_shape[0], -1),
2054
layer._q_scale,
2055
- output=decode_q.view(decode_q_shape[0], -1),
2056
2057
decode_q = decode_q.view(decode_q_shape)
2058
else:
0 commit comments