|
1 | 1 | # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
2 | 2 | # SPDX-License-Identifier: Apache-2.0 |
3 | 3 |
|
4 | | -import time |
5 | 4 | from typing import Optional |
6 | 5 |
|
7 | 6 | import pytest |
|
24 | 23 | try: |
25 | 24 | from dynamo.llm import BlockManager |
26 | 25 | from dynamo.llm.vllm_integration.kv_cache_manager import KvbmCacheManager |
| 26 | + |
27 | 27 | KVBM_NOT_AVAILABLE = False |
28 | | -except: |
| 28 | +except ImportError: |
29 | 29 | KVBM_NOT_AVAILABLE = True |
30 | 30 |
|
31 | | -def new_kv_cache_manager( |
32 | | - num_blocks: int = 11, |
33 | | - page_size: int = 16 |
34 | | -): |
| 31 | + |
| 32 | +def new_kv_cache_manager(num_blocks: int = 11, page_size: int = 16): |
35 | 33 | """ |
36 | 34 | Creates a new KVBM cache manager. |
37 | 35 |
|
@@ -89,7 +87,10 @@ def make_kv_cache_config(block_size: int, num_blocks: int) -> KVCacheConfig: |
89 | 87 | ) |
90 | 88 |
|
91 | 89 |
|
92 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 90 | +@pytest.mark.skipif( |
| 91 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 92 | + reason="VLLM not available or KVBM not available", |
| 93 | +) |
93 | 94 | def test_prefill(): |
94 | 95 | """ |
95 | 96 | Tests the KvbmCacheManager's prefill functionality. |
@@ -282,7 +283,10 @@ def test_prefill_plp(): |
282 | 283 | manager.free(req2) |
283 | 284 |
|
284 | 285 |
|
285 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 286 | +@pytest.mark.skipif( |
| 287 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 288 | + reason="VLLM not available or KVBM not available", |
| 289 | +) |
286 | 290 | def test_decode(): |
287 | 291 | manager = new_kv_cache_manager() |
288 | 292 |
|
@@ -350,7 +354,10 @@ def test_decode(): |
350 | 354 | manager.free_block_hashes(req0) |
351 | 355 |
|
352 | 356 |
|
353 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 357 | +@pytest.mark.skipif( |
| 358 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 359 | + reason="VLLM not available or KVBM not available", |
| 360 | +) |
354 | 361 | def test_evict(): |
355 | 362 | manager = new_kv_cache_manager() |
356 | 363 | used_blocks = set() |
@@ -416,10 +423,13 @@ def test_evict(): |
416 | 423 | # assert manager.block_pool.free_block_queue.num_free_blocks == 7 |
417 | 424 |
|
418 | 425 |
|
419 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 426 | +@pytest.mark.skipif( |
| 427 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 428 | + reason="VLLM not available or KVBM not available", |
| 429 | +) |
420 | 430 | def test_hash_block_correct_reuse(): |
421 | 431 | """ |
422 | | - This tests when a previously cached block is reused as a new block, |
| 432 | + This tests when a previously cached block is reused as a new block, |
423 | 433 | its hash metadata should be correctly reset. |
424 | 434 | """ |
425 | 435 | block_size = 16 |
@@ -467,7 +477,10 @@ def test_hash_block_correct_reuse(): |
467 | 477 | assert blocks.blocks[1].block_hash is None |
468 | 478 |
|
469 | 479 |
|
470 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 480 | +@pytest.mark.skipif( |
| 481 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 482 | + reason="VLLM not available or KVBM not available", |
| 483 | +) |
471 | 484 | def test_computed_blocks_not_evicted(): |
472 | 485 | """ |
473 | 486 | Test that the computed blocks are not evicted when getting new blocks |
@@ -564,7 +577,10 @@ def _test_mm_prefix_caching(): |
564 | 577 | pass |
565 | 578 |
|
566 | 579 |
|
567 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 580 | +@pytest.mark.skipif( |
| 581 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 582 | + reason="VLLM not available or KVBM not available", |
| 583 | +) |
568 | 584 | def test_cache_key_salting(): |
569 | 585 | """ |
570 | 586 | This tests that cache salts are applied during hashing and the cache |
@@ -635,7 +651,10 @@ def test_cache_key_salting(): |
635 | 651 | """ |
636 | 652 |
|
637 | 653 |
|
638 | | -@pytest.mark.skipif(VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, reason="VLLM not available or KVBM not available") |
| 654 | +@pytest.mark.skipif( |
| 655 | + VLLM_NOT_AVAILABLE or KVBM_NOT_AVAILABLE, |
| 656 | + reason="VLLM not available or KVBM not available", |
| 657 | +) |
639 | 658 | def test_prefill_not_enough_free_blocks_with_computed_blocks(): |
640 | 659 | """ |
641 | 660 | This is a unit test that tests the correctness of the allocate_slots |
@@ -758,6 +777,7 @@ def _test_eagle_with_sliding_window(): |
758 | 777 | Test Eagle behavior with sliding window.""" |
759 | 778 | pass |
760 | 779 |
|
| 780 | + |
761 | 781 | @pytest.mark.skipif(KVBM_NOT_AVAILABLE, reason="KVBM not available") |
762 | 782 | def test_kvbm_wrong_blocks_provided(): |
763 | 783 | """ |
|
0 commit comments