Skip to content

Commit

Permalink
[Core][Bugfix]: fix prefix caching for blockv2 (vllm-project#4764)
Browse files Browse the repository at this point in the history
Co-authored-by: Lei Wen <wenlei03@qiyi.com>
  • Loading branch information
leiwen83 and wenlei03 authored May 24, 2024
1 parent 9197709 commit e64fde4
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 17 deletions.
117 changes: 117 additions & 0 deletions tests/core/block/test_prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,123 @@ def test_get_common_computed_block_ids(num_blocks: int, block_size: int,

assert (len(res) == zero_point_blocks)

# Test case that assume those prompted block after first immutable would
# be freed into hashless allocator, while first immutable block get ref
# increased.
@staticmethod
@pytest.mark.parametrize("num_blocks", [3])
@pytest.mark.parametrize("block_size", [16])
@pytest.mark.parametrize("seed", list(range(10)))
def test_alloc_promotion(num_blocks: int, block_size: int, seed: int):
random.seed(seed)

allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks,
block_size=block_size)
token_ids = list(range(block_size))

block = allocator.allocate_immutable(prev_block=None,
token_ids=token_ids)

assert allocator._refcounter.get(block.block_id) == 1
m = allocator.allocate_mutable(prev_block=None)

block_id = m.block_id
for i in range(block_size):
m.append_token_ids([i])
# After block get promoted to immutable from mutable, if there is
# already same content hash block, then it shall be released into
# hashless_allocator
# And first immutable block's ref get increased by 1
assert m.block_id == block.block_id
assert block_id in allocator._hashless_allocator._free_block_indices
assert allocator._refcounter.get(block.block_id) == 2

# Test case when eviction and allocation are mixed,
# make sure they work as expected
@staticmethod
@pytest.mark.parametrize("num_blocks", [3])
@pytest.mark.parametrize("block_size", [16])
@pytest.mark.parametrize("seed", list(range(10)))
def test_eviction_alloc_mixed(num_blocks: int, block_size: int, seed: int):
random.seed(seed)

all_blocks_list = [i for i in range(num_blocks)]
zero_ref = {i: 0 for i in range(num_blocks)}
allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks,
block_size=block_size)
token_ids = list(range(num_blocks * block_size))

# now we have num_blocks free blocks in hashless allocator
# with internal tracking list _blocks _cached_blocks and evictor
# empty and block's ref shall be 0
assert list(allocator._hashless_allocator._free_block_indices
) == all_blocks_list
assert len(allocator._blocks.keys()) == 0
assert len(allocator._cached_blocks.values()) == 0
assert len(allocator.evictor.free_table.keys()) == 0
assert allocator._refcounter._refcounts == zero_ref

# Allocate immutable chains with only one block residuled in
new_block = []
for i in range(num_blocks):
block = allocator.allocate_immutable(
prev_block=None,
token_ids=token_ids[block_size * i:block_size * (i + 1)])
new_block.append(block)

# Free all blocks, and now all blocks shall be in the evictor
# there shall be no tracking data left in _blocks
# all blocks shall be tracked in _cached_blocks
# all blocks' ref shall be zero
for block in new_block:
allocator.free(block)

assert len(allocator._blocks.keys()) == 0
assert len(allocator._hashless_allocator._free_block_indices) == 0
assert list(allocator._cached_blocks.values()) == all_blocks_list
assert list(allocator.evictor.free_table.keys()) == all_blocks_list
assert allocator._refcounter._refcounts == zero_ref

# Allocate a mutable block, and the first block shall be evicted
# and set its content hash into None, ref to 1
mutable = allocator.allocate_mutable(prev_block=None)

assert mutable.block_id == 0
assert mutable.content_hash is None
assert 0 in allocator._blocks
assert allocator._refcounter.get(0) == 1
assert 0 not in allocator._cached_blocks
assert 0 not in allocator.evictor

# Since this mutable block has no hash yet, it shall be released into
# hashless allocator
allocator.free(mutable)

assert len(allocator._blocks.keys()) == 0
assert allocator._refcounter._refcounts == zero_ref
assert 0 not in allocator._cached_blocks
assert 0 not in allocator.evictor
assert 0 in allocator._hashless_allocator._free_block_indices

# when allocate immutable with first block_size tokens, we
# shall get free block from hashless allocator, thus no block left
# in hashless
block = allocator.allocate_immutable(prev_block=None,
token_ids=token_ids[:block_size])

assert block.block_id == 0
assert len(allocator._hashless_allocator._free_block_indices) == 0
assert 0 in allocator._blocks
assert 0 in allocator._cached_blocks.values()
assert allocator._refcounter.get(0) == 1
assert 0 not in allocator.evictor

# allocate mutable block again, it shall be popped from evictor
mutable = allocator.allocate_mutable(prev_block=None)
assert len(allocator._hashless_allocator._free_block_indices) == 0
assert mutable.block_id not in allocator.evictor.free_table
assert allocator._refcounter.get(mutable.block_id) == 1

# Test case where two last accessed times are equal
@staticmethod
@pytest.mark.parametrize("num_blocks", [1024])
Expand Down
41 changes: 24 additions & 17 deletions vllm/core/block/prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,21 +160,17 @@ def allocate_mutable(self,
# If the evictor has blocks available for eviction, evict a block
# and return it.
if self.evictor.num_blocks > 0:
# here we get an evicted block, which is only added
# into evictor if its ref counter is 0
# and since its content would be changed, we need
# to remove it from _cached_blocks's tracking list
block_id, content_hash_to_evict = self.evictor.evict()

# Here we may have scenario that several blocks have
# the same content hash, but due to the latter coming block
# is coming from mutable to immutable path, their physical
# block is added into evictor.
# However in this case, we shall not pop the _cached_blocks,
# as the same content is still used by others, which means
# we need to check ref before decide to pop the list.

_block_id = self._cached_blocks[content_hash_to_evict]
refcount = self._refcounter.get(_block_id)
if refcount == 1:
self._cached_blocks.pop(content_hash_to_evict)
assert _block_id == block_id
assert self._refcounter.get(_block_id) == 0
assert _block_id == block_id

self._cached_blocks.pop(content_hash_to_evict)

self._refcounter.incr(block_id)

Expand All @@ -199,7 +195,11 @@ def allocate_mutable(self,

def _incr_refcount_cached_block(self, block: Block,
block_id: BlockId) -> None:
# since block is already computed, mark it
# now _incr_refcount_cached_block comes from two place
# allocate_immutable/promote_to_immutable_block where hit
# _cached_blocks hash key.
# In both cases, it means that already exists a already
# computed block which shared with block now
block.computed = True

refcount = self._refcounter.incr(block_id)
Expand Down Expand Up @@ -228,13 +228,19 @@ def _free_block_id_for_block(self, block_id: BlockId,
block: Block) -> None:
assert isinstance(block, PrefixCachingBlock)

if block.content_hash is None:
# if we comes from promote_to_immutable_block, it means that
# block.content_hash is never None.
# However we need to release the same content block, so that
# physical block could get reused.
if block.block_id != block_id or block.content_hash is None:
refcount = self._refcounter.get(block_id)
# We have fork case where block would get more than one ref,
# so we cannot free it from tracking if ref cnt large than 1
if refcount <= 1:
assert block.block_id is not None
assert block.block_id is not None
refcount = self._refcounter.get(block.block_id)
if refcount == 1:
del self._blocks[block.block_id]

return self._hashless_allocator.free(block)

refcount = self._refcounter.decr(block_id)
Expand Down Expand Up @@ -317,7 +323,8 @@ def promote_to_immutable_block(self, block: Block) -> BlockId:
if block.content_hash not in self._cached_blocks:
self._cached_blocks[block.content_hash] = block.block_id
else:
self._free_block_id_for_block(block.block_id, block)
self._free_block_id_for_block(
self._cached_blocks[block.content_hash], block)
self._incr_refcount_cached_block(
block, self._cached_blocks[block.content_hash])

Expand Down

0 comments on commit e64fde4

Please sign in to comment.