Skip to content

Commit 0400566

Browse files
author
Kfir Wolfson
committed
fix after local CR
Signed-off-by: Kfir Wolfson <kfirw@pliops.com>
1 parent e803552 commit 0400566

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

vllm/v1/core/sched/scheduler.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,9 +415,9 @@ def schedule(self) -> SchedulerOutput:
415415
global_cache_hit_threshold)
416416

417417
# Check if cache hit is above threshold
418-
prompt_len = len(request.prompt_token_ids)
419-
cache_hit_percent = num_computed_tokens / prompt_len \
420-
if prompt_len > 0 else 0.0
418+
cache_hit_percent = \
419+
num_computed_tokens / request.num_prompt_tokens \
420+
if request.num_prompt_tokens > 0 else 0.0
421421
if cache_hit_percent < cache_hit_threshold:
422422
threshold_source = ("request"
423423
if request.cache_hit_threshold
@@ -1028,8 +1028,11 @@ def update_from_output(
10281028
# Handle requests that were rejected due to low cache hit rate.
10291029
if self.cache_hit_below_threshold_request_ids:
10301030
for req_id in self.cache_hit_below_threshold_request_ids:
1031+
req = self.requests.get(req_id)
1032+
if req is None:
1033+
# The request is already finished, e.g. aborted.
1034+
continue
10311035
# Add EngineCoreOutput for this Request.
1032-
req = self.requests[req_id]
10331036
req.status = RequestStatus.FINISHED_CACHE_HIT_BELOW_THRESHOLD
10341037
outputs[req.client_index].append(
10351038
EngineCoreOutput(

0 commit comments

Comments
 (0)