[Perf][Easy] Early stop in request_block_hasher (vllm-project#26112)

Jialin · alhridoy · commit ce4e43f3d564 · 2025-10-23T21:32:24.000-06:00
Signed-off-by: Jialin Ouyang &lt;Jialin.Ouyang@gmail.com&gt;
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
@@ -588,6 +588,10 @@ def request_block_hasher(request: Request) -> list[BlockHash]:
         start_token_idx = len(request.block_hashes) * block_size
         num_tokens = request.num_tokens
 
+        if start_token_idx + block_size > num_tokens:
+            # Early stop when there no new full blocks created.
+            return []
+
         curr_mm_idx = 0
         if start_token_idx > 0:
             # Set curr_mm_idx = -1 to indicate the last mm input.