File tree Expand file tree Collapse file tree 1 file changed +7
-4
lines changed Expand file tree Collapse file tree 1 file changed +7
-4
lines changed Original file line number Diff line number Diff line change @@ -415,9 +415,9 @@ def schedule(self) -> SchedulerOutput:
415415 global_cache_hit_threshold )
416416
417417 # Check if cache hit is above threshold
418- prompt_len = len ( request . prompt_token_ids )
419- cache_hit_percent = num_computed_tokens / prompt_len \
420- if prompt_len > 0 else 0.0
418+ cache_hit_percent = \
419+ num_computed_tokens / request . num_prompt_tokens \
420+ if request . num_prompt_tokens > 0 else 0.0
421421 if cache_hit_percent < cache_hit_threshold :
422422 threshold_source = ("request"
423423 if request .cache_hit_threshold
@@ -1028,8 +1028,11 @@ def update_from_output(
10281028 # Handle requests that were rejected due to low cache hit rate.
10291029 if self .cache_hit_below_threshold_request_ids :
10301030 for req_id in self .cache_hit_below_threshold_request_ids :
1031+ req = self .requests .get (req_id )
1032+ if req is None :
1033+ # The request is already finished, e.g. aborted.
1034+ continue
10311035 # Add EngineCoreOutput for this Request.
1032- req = self .requests [req_id ]
10331036 req .status = RequestStatus .FINISHED_CACHE_HIT_BELOW_THRESHOLD
10341037 outputs [req .client_index ].append (
10351038 EngineCoreOutput (
You can’t perform that action at this time.
0 commit comments