From 71f8d20d4e386047dcb9a9a1c0da9d2f77c9a2d1 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Mon, 6 Oct 2025 07:21:49 -0700 Subject: [PATCH 1/4] fix bug Signed-off-by: Chen Zhang --- vllm/v1/engine/core.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 4826d7c589a7..a3e527292f5b 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -177,14 +177,17 @@ def __init__( self.vllm_config.cache_config.enable_prefix_caching or self.scheduler.get_kv_connector() is not None ): - block_size = vllm_config.cache_config.block_size + hash_block_size = ( + vllm_config.cache_config.block_size + * vllm_config.parallel_config.decode_context_parallel_size + ) caching_hash_fn = get_hash_fn_by_name( vllm_config.cache_config.prefix_caching_hash_algo ) init_none_hash(caching_hash_fn) self.request_block_hasher = get_request_block_hasher( - block_size, caching_hash_fn + hash_block_size, caching_hash_fn ) self.step_fn = ( From 59a6129438e1aabd5e5279679f1bb3f724932d58 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Mon, 6 Oct 2025 07:32:38 -0700 Subject: [PATCH 2/4] clean up Signed-off-by: Chen Zhang --- vllm/v1/core/sched/scheduler.py | 10 ++-------- vllm/v1/engine/core.py | 12 +++++++----- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index d9a0ff1aa5c9..0df919589c08 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -45,6 +45,7 @@ def __init__( vllm_config: VllmConfig, kv_cache_config: KVCacheConfig, structured_output_manager: StructuredOutputManager, + block_size: int, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, include_finished_set: bool = False, log_stats: bool = False, @@ -101,15 +102,8 @@ def __init__( num_gpu_blocks = self.cache_config.num_gpu_blocks assert num_gpu_blocks is not None and num_gpu_blocks > 0 - self.block_size = self.cache_config.block_size - + self.block_size = block_size self.dcp_world_size = vllm_config.parallel_config.decode_context_parallel_size - # Note(hc): The scheduler’s block_size must be multiplied - # by dcp_world_size, since block hashes are computed on the - # original full token sequence at a granularity of - # original_block_size × dcp_world_size. - if self.dcp_world_size > 1: - self.block_size *= self.dcp_world_size # req_id -> Request self.requests: dict[str, Request] = {} diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index a3e527292f5b..f0b621ae8e3e 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -142,12 +142,18 @@ def __init__( logger.info("Disabling chunked prefill for model without KVCache") vllm_config.scheduler_config.chunked_prefill_enabled = False + scheduler_block_size = ( + vllm_config.cache_config.block_size + * vllm_config.parallel_config.decode_context_parallel_size + ) + self.scheduler: SchedulerInterface = Scheduler( vllm_config=vllm_config, kv_cache_config=kv_cache_config, structured_output_manager=self.structured_output_manager, include_finished_set=vllm_config.parallel_config.data_parallel_size > 1, log_stats=self.log_stats, + block_size=scheduler_block_size, ) self.use_spec_decode = vllm_config.speculative_config is not None if self.scheduler.connector is not None: # type: ignore @@ -177,17 +183,13 @@ def __init__( self.vllm_config.cache_config.enable_prefix_caching or self.scheduler.get_kv_connector() is not None ): - hash_block_size = ( - vllm_config.cache_config.block_size - * vllm_config.parallel_config.decode_context_parallel_size - ) caching_hash_fn = get_hash_fn_by_name( vllm_config.cache_config.prefix_caching_hash_algo ) init_none_hash(caching_hash_fn) self.request_block_hasher = get_request_block_hasher( - hash_block_size, caching_hash_fn + scheduler_block_size, caching_hash_fn ) self.step_fn = ( From ead37b09cb89312306cf26bfa49f9d7e1313eb13 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 9 Oct 2025 00:30:47 -0700 Subject: [PATCH 3/4] add block_size to tests Signed-off-by: Chen Zhang --- tests/v1/core/test_scheduler.py | 1 + tests/v1/kv_connector/unit/utils.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py index dfa965c56766..a384e4aadcf4 100644 --- a/tests/v1/core/test_scheduler.py +++ b/tests/v1/core/test_scheduler.py @@ -1411,6 +1411,7 @@ def create_scheduler_with_priority( kv_cache_config=kv_cache_config, log_stats=True, structured_output_manager=StructuredOutputManager(vllm_config), + block_size=block_size, ) diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py index 24c0bd51216d..6f51b9bbcbda 100644 --- a/tests/v1/kv_connector/unit/utils.py +++ b/tests/v1/kv_connector/unit/utils.py @@ -138,6 +138,7 @@ def create_scheduler( kv_cache_config=kv_cache_config, log_stats=True, structured_output_manager=StructuredOutputManager(vllm_config), + block_size=block_size, ) From d23a6f5315710e2e7df0e60b57901d5dcaa5a65f Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Thu, 9 Oct 2025 06:39:46 -0700 Subject: [PATCH 4/4] fix block_size Signed-off-by: Chen Zhang --- tests/v1/core/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/v1/core/utils.py b/tests/v1/core/utils.py index 75ef1a5ec165..c11cf3e817d1 100644 --- a/tests/v1/core/utils.py +++ b/tests/v1/core/utils.py @@ -129,6 +129,7 @@ def create_scheduler( return scheduler_cls( vllm_config=vllm_config, kv_cache_config=kv_cache_config, + block_size=block_size, log_stats=True, structured_output_manager=StructuredOutputManager(vllm_config), )