Skip to content

Commit 8e8ea29

Browse files
committed
Refactor prefix cache stats tracking
Signed-off-by: tovam <tovam@pliops.com>
1 parent 6fd5cfc commit 8e8ea29

File tree

2 files changed

+24
-38
lines changed

2 files changed

+24
-38
lines changed

vllm/distributed/kv_transfer/kv_connector/v1/base.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444

4545
from vllm.logger import init_logger
4646
from vllm.v1.core.sched.output import SchedulerOutput
47-
from vllm.v1.metrics.stats import PrefixCacheStats
4847
from vllm.v1.outputs import KVConnectorOutput
4948

5049
if TYPE_CHECKING:
@@ -101,8 +100,6 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole):
101100
else:
102101
raise ValueError("kv_transfer_config must be set for KVConnectorBase_V1")
103102
self._role = role
104-
# FIXME: make prefix cache stats conditional on log_stats
105-
self.prefix_cache_stats = PrefixCacheStats()
106103

107104
@property
108105
def role(self) -> KVConnectorRole:
@@ -434,28 +431,3 @@ def build_kv_connector_stats(
434431
which can implement custom aggregation logic on the data dict.
435432
"""
436433
return None
437-
438-
def update_prefix_cache_stats(
439-
self, request_num_tokens: int, num_external_tokens: int
440-
) -> None:
441-
"""
442-
Update prefix cache statistics for a request.
443-
444-
Args:
445-
request_num_tokens (int): the number of tokens in the request.
446-
num_external_tokens (int): the number of tokens that will be
447-
loaded from the external KV cache.
448-
"""
449-
self.prefix_cache_stats.requests += 1
450-
self.prefix_cache_stats.queries += request_num_tokens
451-
self.prefix_cache_stats.hits += num_external_tokens
452-
453-
def make_prefix_cache_stats(self) -> PrefixCacheStats | None:
454-
"""Get (and reset) the prefix cache stats.
455-
456-
Returns:
457-
The current prefix caching stats.
458-
"""
459-
stats = self.prefix_cache_stats
460-
self.prefix_cache_stats = PrefixCacheStats()
461-
return stats

vllm/v1/core/sched/scheduler.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from vllm.v1.core.sched.utils import check_stop, remove_all
2929
from vllm.v1.engine import EngineCoreEventType, EngineCoreOutput, EngineCoreOutputs
3030
from vllm.v1.kv_cache_interface import KVCacheConfig
31-
from vllm.v1.metrics.stats import SchedulerStats
31+
from vllm.v1.metrics.stats import PrefixCacheStats, SchedulerStats
3232
from vllm.v1.outputs import DraftTokenIds, KVConnectorOutput, ModelRunnerOutput
3333
from vllm.v1.request import Request, RequestStatus
3434
from vllm.v1.spec_decode.metrics import SpecDecodingStats
@@ -84,6 +84,7 @@ def __init__(
8484
# will have a corresponding KVConnector with Role=WORKER.
8585
# KV Connector pushes/pull of remote KVs for P/D and offloading.
8686
self.connector = None
87+
self.connector_prefix_cache_stats: PrefixCacheStats | None = None
8788
if self.vllm_config.kv_transfer_config is not None:
8889
assert len(self.kv_cache_config.kv_cache_groups) == 1, (
8990
"Multiple KV cache groups are not currently supported "
@@ -95,6 +96,8 @@ def __init__(
9596
self.connector = KVConnectorFactory.create_connector(
9697
config=self.vllm_config, role=KVConnectorRole.SCHEDULER
9798
)
99+
if self.log_stats:
100+
self.connector_prefix_cache_stats = PrefixCacheStats()
98101

99102
self.kv_event_publisher = EventPublisherFactory.create(
100103
self.kv_events_config,
@@ -525,10 +528,9 @@ def schedule(self) -> SchedulerOutput:
525528
new_computed_blocks + new_blocks,
526529
num_external_computed_tokens,
527530
)
528-
if self.log_stats:
529-
self.connector.update_prefix_cache_stats(
530-
request.num_tokens, num_external_computed_tokens
531-
)
531+
self._update_connector_prefix_cache_stats(
532+
request.num_tokens, num_external_computed_tokens
533+
)
532534

533535
# Request was already popped from self.waiting
534536
# unless it was re-added above due to new_blocks being None.
@@ -1249,11 +1251,7 @@ def make_stats(
12491251
if not self.log_stats:
12501252
return None
12511253
prefix_cache_stats = self.kv_cache_manager.make_prefix_cache_stats()
1252-
connector_prefix_cache_stats = (
1253-
self.connector.make_prefix_cache_stats()
1254-
if self.connector is not None
1255-
else None
1256-
)
1254+
connector_prefix_cache_stats = self._make_connector_prefix_cache_stats()
12571255
assert prefix_cache_stats is not None
12581256
return SchedulerStats(
12591257
num_running_reqs=len(self.running),
@@ -1291,6 +1289,22 @@ def shutdown(self) -> None:
12911289
# KV Connector Related Methods
12921290
########################################################################
12931291

1292+
def _update_connector_prefix_cache_stats(
1293+
self, request_num_tokens: int, num_external_tokens: int
1294+
) -> None:
1295+
if self.connector_prefix_cache_stats is None:
1296+
return
1297+
self.connector_prefix_cache_stats.requests += 1
1298+
self.connector_prefix_cache_stats.queries += request_num_tokens
1299+
self.connector_prefix_cache_stats.hits += num_external_tokens
1300+
1301+
def _make_connector_prefix_cache_stats(self) -> PrefixCacheStats | None:
1302+
if self.connector_prefix_cache_stats is None:
1303+
return None
1304+
stats = self.connector_prefix_cache_stats
1305+
self.connector_prefix_cache_stats = PrefixCacheStats()
1306+
return stats
1307+
12941308
def get_kv_connector(self) -> KVConnectorBase_V1 | None:
12951309
return self.connector
12961310

0 commit comments

Comments
 (0)