We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1ae7228 commit 651b9deCopy full SHA for 651b9de
vllm/v1/worker/gpu_model_runner.py
@@ -3530,7 +3530,6 @@ def _dummy_run(
3530
self.query_start_loc.np[1 : num_reqs + 1] = cum_num_tokens
3531
self.query_start_loc.copy_to_gpu()
3532
3533
- # Build attention metadata using the unified method
3534
attn_metadata, _ = self._build_attention_metadata(
3535
total_num_scheduled_tokens=num_tokens,
3536
max_num_scheduled_tokens=max_query_len,
0 commit comments