File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change 2323from vllm .sampling_params import SamplingType
2424from vllm .sequence import IntermediateTensors
2525from vllm .utils import LayerBlockType , cdiv , is_pin_memory_available
26- from vllm .v1 .attention .backends .pallas import (PallasAttentionBackend ,
26+ from vllm .v1 .attention .backends .pallas import (NUM_KV_PAGES_PER_BLOCK ,
27+ PallasAttentionBackend ,
2728 PallasMetadata )
2829from vllm .v1 .core .encoder_cache_manager import compute_encoder_budget
2930from vllm .v1 .kv_cache_interface import (FullAttentionSpec , KVCacheConfig ,
@@ -138,8 +139,10 @@ def __init__(
138139 device = "cpu" )
139140 self .slot_mapping_np = self .slot_mapping_cpu .numpy ()
140141
142+ padded_max_num_blocks_per_req = _get_padded_number (
143+ self .max_num_blocks_per_req , NUM_KV_PAGES_PER_BLOCK )
141144 self .block_table_cpu = torch .zeros (
142- (self .max_num_tokens , self . max_num_blocks_per_req ),
145+ (self .max_num_tokens , padded_max_num_blocks_per_req ),
143146 dtype = self .input_batch .block_table .get_cpu_tensor ().dtype ,
144147 device = "cpu" )
145148
You can’t perform that action at this time.
0 commit comments