From d515360ccfde70121e16b5607b1d02a2d6c5c5fb Mon Sep 17 00:00:00 2001
From: Sourashis Roy <sroy@roblox.com>
Date: Sun, 6 Oct 2024 04:13:17 +0000
Subject: [PATCH 1/6] Fix BlockManager V2 when the encoder input is None

---
 vllm/core/block/block_table.py | 2 --
 vllm/core/block_manager_v2.py  | 3 ++-
 vllm/engine/arg_utils.py       | 5 -----
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/vllm/core/block/block_table.py b/vllm/core/block/block_table.py
index a9f4bd871dfd..d10cb29ef4a7 100644
--- a/vllm/core/block/block_table.py
+++ b/vllm/core/block/block_table.py
@@ -220,7 +220,6 @@ def free(self) -> None:
         occupied by each block. After freeing all the blocks, the `_blocks` list
         is set to `None`.
         """
-        assert self._is_allocated
         for block in self.blocks:
             self._allocator.free(block)
         self._blocks.reset()
@@ -239,7 +238,6 @@ def physical_block_ids(self) -> List[int]:
             List[int]: A list of physical block indices for the blocks in the
                 BlockTable.
         """
-        assert self._is_allocated
         return self._blocks.ids()
 
     def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]:
diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index 0fad5fa99daf..2dd5479f54a7 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -151,7 +151,8 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             block_allocator=self.block_allocator,
             max_block_sliding_window=self.max_block_sliding_window,
         )
-        block_table.allocate(seq.get_token_ids())
+        if (len(seq.get_token_ids()) > 0):
+            block_table.allocate(seq.get_token_ids())
 
         return block_table
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 1623ebb3aa74..cae95d20ca23 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -903,11 +903,6 @@ def create_engine_config(self) -> EngineConfig:
                     "--enable-prefix-caching is currently not "
                     "supported for multimodal models and has been disabled.")
             self.enable_prefix_caching = False
-        if model_config.is_encoder_decoder_model:
-            logger.warning(
-                "Block Manager v2 does not support encoder-decoder models"
-                " currently. Using Block Manager v1 as fallback.")
-            self.use_v2_block_manager = False
 
         cache_config = CacheConfig(
             block_size=self.block_size if self.device != "neuron" else

From effc48d6b4b2dfb82fcbdf7f3da5ab43b700c21a Mon Sep 17 00:00:00 2001
From: Sourashis Roy <sroy@roblox.com>
Date: Sun, 6 Oct 2024 18:57:54 +0000
Subject: [PATCH 2/6] Comments

---
 vllm/core/block_manager_v2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index 2dd5479f54a7..752faa845ada 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -152,6 +152,8 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             max_block_sliding_window=self.max_block_sliding_window,
         )
         if (len(seq.get_token_ids()) > 0):
+            # Add blocks to the block table only if the encoder sequence
+            # is non empty.
             block_table.allocate(seq.get_token_ids())
 
         return block_table

From 3296228e48b29647f77712d1fa7020bc7214dd30 Mon Sep 17 00:00:00 2001
From: Sourashis Roy <sroy@roblox.com>
Date: Sun, 6 Oct 2024 19:00:28 +0000
Subject: [PATCH 3/6] Comment

---
 vllm/core/block_manager_v2.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index 752faa845ada..81724a023f84 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -152,8 +152,7 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             max_block_sliding_window=self.max_block_sliding_window,
         )
         if (len(seq.get_token_ids()) > 0):
-            # Add blocks to the block table only if the encoder sequence
-            # is non empty.
+            # Add blocks to the block table only if the sequence is non empty.
             block_table.allocate(seq.get_token_ids())
 
         return block_table

From 2a484cb24dded46e713cf48476067972bb81bb58 Mon Sep 17 00:00:00 2001
From: sroy745 <142070531+sroy745@users.noreply.github.com>
Date: Sun, 6 Oct 2024 12:42:38 -0700
Subject: [PATCH 4/6] Update vllm/core/block_manager_v2.py

Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
---
 vllm/core/block_manager_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index 81724a023f84..c7ee6609306d 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -151,7 +151,7 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             block_allocator=self.block_allocator,
             max_block_sliding_window=self.max_block_sliding_window,
         )
-        if (len(seq.get_token_ids()) > 0):
+        if seq.get_token_ids():
             # Add blocks to the block table only if the sequence is non empty.
             block_table.allocate(seq.get_token_ids())
 

From 56b446dd751778c2b2169e1527d24f523c7bac3c Mon Sep 17 00:00:00 2001
From: Sourashis Roy <sroy@roblox.com>
Date: Sun, 6 Oct 2024 23:55:56 +0000
Subject: [PATCH 5/6] Dummy

---
 vllm/core/block_manager_v2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index c7ee6609306d..1cd507126e3a 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -151,6 +151,7 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             block_allocator=self.block_allocator,
             max_block_sliding_window=self.max_block_sliding_window,
         )
+        
         if seq.get_token_ids():
             # Add blocks to the block table only if the sequence is non empty.
             block_table.allocate(seq.get_token_ids())

From 27047a8d4087ed2245c7018e924b9f8496426991 Mon Sep 17 00:00:00 2001
From: Sourashis Roy <sroy@roblox.com>
Date: Sun, 6 Oct 2024 23:56:19 +0000
Subject: [PATCH 6/6] Dummy

---
 vllm/core/block_manager_v2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index 1cd507126e3a..c7ee6609306d 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -151,7 +151,6 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             block_allocator=self.block_allocator,
             max_block_sliding_window=self.max_block_sliding_window,
         )
-        
         if seq.get_token_ids():
             # Add blocks to the block table only if the sequence is non empty.
             block_table.allocate(seq.get_token_ids())