We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a30261f commit 75e2563Copy full SHA for 75e2563
vllm_ascend/worker/model_runner_v1.py
@@ -4226,8 +4226,9 @@ def _get_prompt_logprobs_dict(
4226
else:
4227
# This is the last chunk of prompt tokens to return.
4228
num_logits = num_remaining_tokens
4229
- completed_prefill_reqs.append(req_id)
4230
- prompt_logprobs_dict[req_id] = logprobs_tensors
+ if num_logits > 0:
+ completed_prefill_reqs.append(req_id)
4231
+ prompt_logprobs_dict[req_id] = logprobs_tensors
4232
4233
if num_logits <= 0:
4234
# This can happen for the final chunk if we prefilled exactly
0 commit comments