Skip to content

Commit e519281

Browse files
authored
[Metrics] Add test for multi-modal cache stats logging (#26588)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
1 parent 7b03584 commit e519281

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

tests/entrypoints/llm/test_mm_cache_stats.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import logging
5+
46
import pytest
7+
import regex as re
58

69
from vllm import LLM
710
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
11+
from vllm.v1.metrics import loggers as stat_loggers
812
from vllm.v1.metrics.reader import Counter, Metric
913

1014
from ..openai.test_vision import TEST_IMAGE_ASSETS
@@ -37,12 +41,27 @@ def _get_mm_cache_stats(metrics: list[Metric]):
3741
return mm_cache_queries, mm_cache_hits
3842

3943

44+
def _get_mm_cache_log(llm: LLM, caplog_vllm: pytest.LogCaptureFixture) -> float:
45+
caplog_vllm.clear()
46+
with caplog_vllm.at_level(logging.INFO, logger=stat_loggers.__name__):
47+
llm.llm_engine.do_log_stats()
48+
49+
assert len(caplog_vllm.records) == 1
50+
msg = caplog_vllm.records[0].getMessage()
51+
52+
assert "MM cache hit rate" in msg
53+
match = re.search(r"MM cache hit rate: ([0-9.]+)%", msg)
54+
assert match is not None
55+
return float(match.group(1))
56+
57+
4058
@pytest.mark.parametrize("image_urls", [TEST_IMAGE_ASSETS[:2]], indirect=True)
4159
@pytest.mark.parametrize("mm_processor_cache_type", ["lru", "shm"])
4260
def test_mm_cache_stats(
4361
num_gpus_available,
4462
image_urls,
4563
mm_processor_cache_type,
64+
caplog_vllm,
4665
):
4766
llm = LLM(
4867
model="llava-hf/llava-1.5-7b-hf",
@@ -56,19 +75,24 @@ def test_mm_cache_stats(
5675

5776
llm.chat(_make_messages(image_urls[0]))
5877
assert _get_mm_cache_stats(llm.get_metrics()) == (1, 0)
78+
assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0)
5979

6080
llm.chat(_make_messages(image_urls[1]))
6181
assert _get_mm_cache_stats(llm.get_metrics()) == (2, 0)
82+
assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0)
6283

6384
llm.chat(_make_messages(image_urls[0]))
6485
assert _get_mm_cache_stats(llm.get_metrics()) == (3, 1)
86+
assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(33.3)
6587

6688
# NOTE: This only resets hit rate stats in CachingMetrics
6789
# The raw queries and hits counts remain unaffected
6890
llm.reset_mm_cache()
6991

7092
llm.chat(_make_messages(image_urls[0]))
7193
assert _get_mm_cache_stats(llm.get_metrics()) == (4, 1)
94+
assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0)
7295

7396
llm.chat(_make_messages(image_urls[1]))
7497
assert _get_mm_cache_stats(llm.get_metrics()) == (5, 1)
98+
assert _get_mm_cache_log(llm, caplog_vllm) == pytest.approx(0.0)

vllm/v1/metrics/loggers.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
6060
self._reset(time.monotonic())
6161

6262
self.last_scheduler_stats = SchedulerStats()
63-
self.last_mm_cache_stats: Optional[MultiModalCacheStats] = None
6463

6564
# Caching metrics. This cannot be reset.
6665
# TODO: Make the interval configurable.
@@ -115,8 +114,6 @@ def record(
115114
if mm_cache_stats:
116115
self.mm_caching_metrics.observe(mm_cache_stats)
117116

118-
self.last_mm_cache_stats = mm_cache_stats
119-
120117
def log(self):
121118
now = time.monotonic()
122119
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
@@ -157,7 +154,7 @@ def log(self):
157154
scheduler_stats.kv_cache_usage * 100,
158155
self.prefix_caching_metrics.hit_rate * 100,
159156
]
160-
if self.last_mm_cache_stats:
157+
if not self.mm_caching_metrics.empty:
161158
log_parts.append("MM cache hit rate: %.1f%%")
162159
log_args.append(self.mm_caching_metrics.hit_rate * 100)
163160

vllm/v1/metrics/stats.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ def reset(self):
9696
self.aggregated_query_hit = 0
9797
self.query_queue.clear()
9898

99+
@property
100+
def empty(self) -> bool:
101+
"""Return true if no requests have been observed."""
102+
return self.aggregated_requests == 0
103+
99104
@property
100105
def hit_rate(self) -> float:
101106
"""Calculate the hit rate for the past N requests."""

0 commit comments

Comments
 (0)