File tree Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -127,6 +127,7 @@ def get_computed_blocks(
127127 self .req_to_block_hashes [request .request_id ] = block_hashes
128128
129129 if self .log_stats :
130+ assert self .prefix_cache_stats is not None
130131 self .prefix_cache_stats .requests += 1
131132 # When the request requires prompt logprobs, we skip prefix caching.
132133 if request .sampling_params .prompt_logprobs is not None :
@@ -148,6 +149,7 @@ def get_computed_blocks(
148149 computed_blocks = (
149150 self .specialized_manager .find_longest_cache_hit (block_hashes ))
150151 if self .log_stats :
152+ assert self .prefix_cache_stats is not None
151153 self .prefix_cache_stats .queries += len (block_hashes )
152154 self .prefix_cache_stats .hits += len (computed_blocks )
153155
@@ -321,6 +323,7 @@ def reset_prefix_cache(self) -> bool:
321323 if not self .block_pool .reset_prefix_cache ():
322324 return False
323325 if self .log_stats :
326+ assert self .prefix_cache_stats is not None
324327 self .prefix_cache_stats .reset = True
325328 return True
326329
Original file line number Diff line number Diff line change @@ -747,11 +747,13 @@ def make_stats(
747747 ) -> Optional [SchedulerStats ]:
748748 if not self .log_stats :
749749 return None
750+ prefix_cache_stats = self .kv_cache_manager .make_prefix_cache_stats ()
751+ assert prefix_cache_stats is not None
750752 return SchedulerStats (
751753 num_running_reqs = len (self .running ),
752754 num_waiting_reqs = len (self .waiting ),
753755 gpu_cache_usage = self .kv_cache_manager .usage ,
754- prefix_cache_stats = self . kv_cache_manager . make_prefix_cache_stats () ,
756+ prefix_cache_stats = prefix_cache_stats ,
755757 spec_decoding_stats = spec_decoding_stats ,
756758 )
757759
You can’t perform that action at this time.
0 commit comments