Skip to content

Commit

Permalink
Removed padding block from a list of available blocks in allocators (#…
Browse files Browse the repository at this point in the history
…312)

Block 0 is used for padding. This PR removes the padding block from a
list of available blocks in block allocators v1 and v2
  • Loading branch information
tzielinski-habana authored Sep 20, 2024
1 parent 401acbd commit 84b2490
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
8 changes: 6 additions & 2 deletions vllm/core/block/cpu_gpu_block_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
DeviceAwareBlockAllocator)
from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator
from vllm.core.block.prefix_caching_block import PrefixCachingBlockAllocator
from vllm.utils import Device
from vllm.utils import Device, is_hpu


class CpuGpuBlockAllocator(DeviceAwareBlockAllocator):
Expand Down Expand Up @@ -52,7 +52,11 @@ def create(
- The block IDs are assigned contiguously, with GPU block IDs coming
before CPU block IDs.
"""
block_ids = list(range(num_gpu_blocks + num_cpu_blocks))
# For HPU, block id 0 is used only for padding
reserved_blocks = 1 if is_hpu() else 0
block_ids = list(
range(reserved_blocks, num_gpu_blocks + num_cpu_blocks))
num_gpu_blocks -= reserved_blocks
gpu_block_ids = block_ids[:num_gpu_blocks]
cpu_block_ids = block_ids[num_gpu_blocks:]

Expand Down
6 changes: 4 additions & 2 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from vllm.core.interfaces import AllocStatus, BlockSpaceManager
from vllm.logger import init_logger
from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
from vllm.utils import Device
from vllm.utils import Device, is_hpu

logger = init_logger(__name__)

Expand Down Expand Up @@ -171,7 +171,9 @@ def __init__(

# Initialize the free blocks.
self.free_blocks: BlockTable = []
for i in range(num_blocks):
# For HPU, block id 0 is used only for padding
reserved_blocks = 1 if is_hpu() else 0
for i in range(reserved_blocks, num_blocks):
block = PhysicalTokenBlock(device=device,
block_number=i,
block_size=block_size,
Expand Down

0 comments on commit 84b2490

Please sign in to comment.