2828from vllm .v1 .core .sched .utils import check_stop , remove_all
2929from vllm .v1 .engine import EngineCoreEventType , EngineCoreOutput , EngineCoreOutputs
3030from vllm .v1 .kv_cache_interface import KVCacheConfig
31- from vllm .v1 .metrics .stats import SchedulerStats
31+ from vllm .v1 .metrics .stats import PrefixCacheStats , SchedulerStats
3232from vllm .v1 .outputs import DraftTokenIds , KVConnectorOutput , ModelRunnerOutput
3333from vllm .v1 .request import Request , RequestStatus
3434from vllm .v1 .spec_decode .metrics import SpecDecodingStats
@@ -84,6 +84,7 @@ def __init__(
8484 # will have a corresponding KVConnector with Role=WORKER.
8585 # KV Connector pushes/pull of remote KVs for P/D and offloading.
8686 self .connector = None
87+ self .connector_prefix_cache_stats : PrefixCacheStats | None = None
8788 if self .vllm_config .kv_transfer_config is not None :
8889 assert len (self .kv_cache_config .kv_cache_groups ) == 1 , (
8990 "Multiple KV cache groups are not currently supported "
@@ -95,6 +96,8 @@ def __init__(
9596 self .connector = KVConnectorFactory .create_connector (
9697 config = self .vllm_config , role = KVConnectorRole .SCHEDULER
9798 )
99+ if self .log_stats :
100+ self .connector_prefix_cache_stats = PrefixCacheStats ()
98101
99102 self .kv_event_publisher = EventPublisherFactory .create (
100103 self .kv_events_config ,
@@ -526,6 +529,9 @@ def schedule(self) -> SchedulerOutput:
526529 new_computed_blocks + new_blocks ,
527530 num_external_computed_tokens ,
528531 )
532+ self ._update_connector_prefix_cache_stats (
533+ request , num_external_computed_tokens
534+ )
529535
530536 # Request was already popped from self.waiting
531537 # unless it was re-added above due to new_blocks being None.
@@ -1247,11 +1253,13 @@ def make_stats(
12471253 return None
12481254 prefix_cache_stats = self .kv_cache_manager .make_prefix_cache_stats ()
12491255 assert prefix_cache_stats is not None
1256+ connector_prefix_cache_stats = self ._make_connector_prefix_cache_stats ()
12501257 return SchedulerStats (
12511258 num_running_reqs = len (self .running ),
12521259 num_waiting_reqs = len (self .waiting ),
12531260 kv_cache_usage = self .kv_cache_manager .usage ,
12541261 prefix_cache_stats = prefix_cache_stats ,
1262+ connector_prefix_cache_stats = connector_prefix_cache_stats ,
12551263 spec_decoding_stats = spec_decoding_stats ,
12561264 num_corrupted_reqs = sum (req .is_output_corrupted for req in self .running ),
12571265 kv_connector_stats = kv_connector_stats .data if kv_connector_stats else None ,
@@ -1282,6 +1290,25 @@ def shutdown(self) -> None:
12821290 # KV Connector Related Methods
12831291 ########################################################################
12841292
1293+ def _update_connector_prefix_cache_stats (
1294+ self , request : Request , num_external_tokens : int
1295+ ) -> None :
1296+ if self .connector_prefix_cache_stats is None :
1297+ return
1298+
1299+ self .connector_prefix_cache_stats .record (
1300+ num_tokens = request .num_tokens ,
1301+ num_hits = num_external_tokens ,
1302+ preempted = request .num_preemptions > 0 ,
1303+ )
1304+
1305+ def _make_connector_prefix_cache_stats (self ) -> PrefixCacheStats | None :
1306+ if self .connector_prefix_cache_stats is None :
1307+ return None
1308+ stats = self .connector_prefix_cache_stats
1309+ self .connector_prefix_cache_stats = PrefixCacheStats ()
1310+ return stats
1311+
12851312 def get_kv_connector (self ) -> KVConnectorBase_V1 | None :
12861313 return self .connector
12871314
0 commit comments