2828from vllm .v1 .core .sched .utils import check_stop , remove_all
2929from vllm .v1 .engine import EngineCoreEventType , EngineCoreOutput , EngineCoreOutputs
3030from vllm .v1 .kv_cache_interface import KVCacheConfig
31- from vllm .v1 .metrics .stats import SchedulerStats
31+ from vllm .v1 .metrics .stats import PrefixCacheStats , SchedulerStats
3232from vllm .v1 .outputs import DraftTokenIds , KVConnectorOutput , ModelRunnerOutput
3333from vllm .v1 .request import Request , RequestStatus
3434from vllm .v1 .spec_decode .metrics import SpecDecodingStats
@@ -84,6 +84,7 @@ def __init__(
8484 # will have a corresponding KVConnector with Role=WORKER.
8585 # KV Connector pushes/pull of remote KVs for P/D and offloading.
8686 self .connector = None
87+ self .connector_prefix_cache_stats : PrefixCacheStats | None = None
8788 if self .vllm_config .kv_transfer_config is not None :
8889 assert len (self .kv_cache_config .kv_cache_groups ) == 1 , (
8990 "Multiple KV cache groups are not currently supported "
@@ -95,6 +96,8 @@ def __init__(
9596 self .connector = KVConnectorFactory .create_connector (
9697 config = self .vllm_config , role = KVConnectorRole .SCHEDULER
9798 )
99+ if self .log_stats :
100+ self .connector_prefix_cache_stats = PrefixCacheStats ()
98101
99102 self .kv_event_publisher = EventPublisherFactory .create (
100103 self .kv_events_config ,
@@ -525,10 +528,9 @@ def schedule(self) -> SchedulerOutput:
525528 new_computed_blocks + new_blocks ,
526529 num_external_computed_tokens ,
527530 )
528- if self .log_stats :
529- self .connector .update_prefix_cache_stats (
530- request .num_tokens , num_external_computed_tokens
531- )
531+ self ._update_connector_prefix_cache_stats (
532+ request .num_tokens , num_external_computed_tokens
533+ )
532534
533535 # Request was already popped from self.waiting
534536 # unless it was re-added above due to new_blocks being None.
@@ -1249,11 +1251,7 @@ def make_stats(
12491251 if not self .log_stats :
12501252 return None
12511253 prefix_cache_stats = self .kv_cache_manager .make_prefix_cache_stats ()
1252- connector_prefix_cache_stats = (
1253- self .connector .make_prefix_cache_stats ()
1254- if self .connector is not None
1255- else None
1256- )
1254+ connector_prefix_cache_stats = self ._make_connector_prefix_cache_stats ()
12571255 assert prefix_cache_stats is not None
12581256 return SchedulerStats (
12591257 num_running_reqs = len (self .running ),
@@ -1291,6 +1289,22 @@ def shutdown(self) -> None:
12911289 # KV Connector Related Methods
12921290 ########################################################################
12931291
1292+ def _update_connector_prefix_cache_stats (
1293+ self , request_num_tokens : int , num_external_tokens : int
1294+ ) -> None :
1295+ if self .connector_prefix_cache_stats is None :
1296+ return
1297+ self .connector_prefix_cache_stats .requests += 1
1298+ self .connector_prefix_cache_stats .queries += request_num_tokens
1299+ self .connector_prefix_cache_stats .hits += num_external_tokens
1300+
1301+ def _make_connector_prefix_cache_stats (self ) -> PrefixCacheStats | None :
1302+ if self .connector_prefix_cache_stats is None :
1303+ return None
1304+ stats = self .connector_prefix_cache_stats
1305+ self .connector_prefix_cache_stats = PrefixCacheStats ()
1306+ return stats
1307+
12941308 def get_kv_connector (self ) -> KVConnectorBase_V1 | None :
12951309 return self .connector
12961310
0 commit comments