From 2c388c2476bd9dd6091851f8eb65bbb00684c3ad Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 20:30:12 +0900
Subject: [PATCH 01/27] docs: docstring in naive_block

WARNING -  griffe: vllm/core/block/naive_block.py:210: Failed to get 'name: description' pair from 'in whole allocator.'
WARNING -  griffe: vllm/core/block/prefix_caching_block.py:64: Parameter 'block_ids(Optional[Iterable[int]],' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/core/block/naive_block.py          | 2 +-
 vllm/core/block/prefix_caching_block.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/core/block/naive_block.py b/vllm/core/block/naive_block.py
index dae6ead04e9c..7d9b32cd4b67 100644
--- a/vllm/core/block/naive_block.py
+++ b/vllm/core/block/naive_block.py
@@ -207,7 +207,7 @@ def get_physical_block_id(self, absolute_id: int) -> int:
 
         Args:
             absolute_id (int): The absolute block id for the block 
-            in whole allocator.
+                in whole allocator.
 
         Returns:
             int: The zero-offset block id on certain device.
diff --git a/vllm/core/block/prefix_caching_block.py b/vllm/core/block/prefix_caching_block.py
index 2913a01bf34a..a21d69323abb 100644
--- a/vllm/core/block/prefix_caching_block.py
+++ b/vllm/core/block/prefix_caching_block.py
@@ -61,7 +61,7 @@ class PrefixCachingBlockAllocator(BlockAllocator):
     Args:
         num_blocks (int): The total number of blocks to manage.
         block_size (int): The size of each block in tokens.
-        block_ids(Optional[Iterable[int]], optional): An optional iterable of
+        block_ids (Optional[Iterable[int]], optional): An optional iterable of
             block IDs. If not provided, block IDs will be assigned sequentially
             from 0 to num_blocks - 1.
     """

From ff3d8089e71843127999bb355e303dbab18883a4 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 20:45:00 +0900
Subject: [PATCH 02/27] docs: docstring in scheduler

WARNING -  griffe: vllm/core/scheduler.py:660: Failed to get 'name: description' pair from 'that are currently running'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/core/scheduler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index 63894e7f5dc8..c89f3f663264 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -657,7 +657,7 @@ def _schedule_running(
                 `budget.num_batched_tokens` has not enough capacity to schedule
                 all tokens.
             partial_prefill_metadata: information about the partial prefills
-            that are currently running
+                that are currently running
 
         Returns:
             SchedulerRunningOutputs.

From 489ca3eab4a076122e144a41430fe7becdb61120 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 20:53:16 +0900
Subject: [PATCH 03/27] docs: docstring in flash_attn & flashinfer

WARNING -  griffe: vllm/v1/attention/backends/flash_attn.py:441: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'
WARNING -  griffe: vllm/v1/attention/backends/flashinfer.py:641: Parameter '#' does not appear in the function signature
WARNING -  griffe: vllm/v1/attention/backends/flashinfer.py:642: Parameter '#' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/flash_attn.py | 3 ++-
 vllm/v1/attention/backends/flashinfer.py | 8 +++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 6e7096de924c..a9f3d8ee2a63 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -438,7 +438,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: KV cache tensor with shape 
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]
diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py
index 1115fc606b05..70d3471a4725 100755
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -637,11 +637,9 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache: shape -
-            # NHD: [num_blocks, 2, block_size, num_kv_heads, head_size]
-            # HND: [num_blocks, 2,  num_kv_heads, block_size, head_size]
-
-
+            kv_cache: KV cache tensor with different possible shapes:
+                - NHD: [num_blocks, 2, block_size, num_kv_heads, head_size]
+                - HND: [num_blocks, 2, num_kv_heads, block_size, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From 6c43f106c158c748ab2f911327d68285ae6ad663 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 20:58:51 +0900
Subject: [PATCH 04/27] docs: docstring in v1 worker utils

WARNING -  griffe: vllm/v1/worker/utils.py:280: Failed to get 'name: description' pair from 'layers with layer names as keys.'
WARNING -  griffe: vllm/v1/worker/utils.py:174: Confusing indentation for continuation line 8 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/worker/utils.py:176: Confusing indentation for continuation line 10 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/worker/utils.py:177: Confusing indentation for continuation line 11 in docstring, should be 4 * 2 = 8 spaces, not 6

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/worker/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py
index f40753468766..a519336e4161 100644
--- a/vllm/v1/worker/utils.py
+++ b/vllm/v1/worker/utils.py
@@ -172,10 +172,10 @@ def scatter_mm_placeholders(
 
     Args:
         embeds: The multimodal embeddings.
-          Shape: `(num_embeds, embed_dim)`
+            Shape: `(num_embeds, embed_dim)`
         is_embed: A boolean mask indicating which positions in the placeholder
-          tokens need to be filled with multimodal embeddings.
-          Shape: `(num_placeholders, num_embeds)`
+            tokens need to be filled with multimodal embeddings.
+            Shape: `(num_placeholders, num_embeds)`
     """
     if is_embed is None:
         return embeds
@@ -278,7 +278,7 @@ def bind_kv_cache(
     Args:
         kv_caches: The allocated kv_caches with layer names as keys.
         forward_context: The global forward context containing all Attention
-        layers with layer names as keys.
+            layers with layer names as keys.
         runner_kv_caches: The kv_cache declared by ModelRunner.
     """
     # Bind kv_caches to ModelRunner

From fd0977084553ea8212b237368ba39f2e98dc95f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hyogeun=20Oh=20=28=EC=98=A4=ED=9A=A8=EA=B7=BC=29?=
 <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:39:01 +0900
Subject: [PATCH 05/27] docs: docstring format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Hyogeun Oh (오효근) <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/flash_attn.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index a9f3d8ee2a63..04c4b6c35a86 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -438,8 +438,7 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache: KV cache tensor with shape 
-                [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape = [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From 1b73b1faded86322a7e4e21ef93c03d74eb40e59 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:12:22 +0900
Subject: [PATCH 06/27] docs: docstring and type hin in kv_cache_manager

WARNING -  griffe: vllm/v1/core/kv_cache_manager.py:61: No type or annotation for returned value 1
WARNING -  griffe: vllm/v1/core/kv_cache_manager.py:62: No type or annotation for returned value 2
WARNING -  griffe: vllm/v1/core/kv_cache_manager.py:63: No type or annotation for returned value 3

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/core/kv_cache_manager.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py
index b427a9c497fe..87a11fe58a04 100644
--- a/vllm/v1/core/kv_cache_manager.py
+++ b/vllm/v1/core/kv_cache_manager.py
@@ -54,14 +54,15 @@ def get_block_ids(
     def get_block_ids(
         self,
         allow_none: bool = False,
-    ):
+    ) -> Optional[tuple[list[int], ...]]:
         """
         Converts the KVCacheBlocks instance to block_ids.
-        
+
         Returns:
-            tuple[list[int], ...]: A tuple of lists where
-            * the outer tuple corresponds to KV cache groups
-            * each inner list contains the block_ids of the blocks in that group
+            tuple[list[int], ...]: A tuple of lists where:
+                - the outer tuple corresponds to KV cache groups
+                - each inner list contains the block_ids of the blocks in that
+                  group
         """
         if allow_none and all(len(group) == 0 for group in self.blocks):
             return None

From 067fa002aee31089de082f85654f7b3af110b048 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:21:54 +0900
Subject: [PATCH 07/27] docs: docstring and type hint in tpu_model_runner

WARNING -  griffe: vllm/v1/worker/tpu_model_runner.py:567: Failed to get 'name: description' pair from 'to be scheduled for each request.'
WARNING -  griffe: vllm/v1/worker/tpu_model_runner.py:570: No type or annotation for returned value 1
WARNING -  griffe: vllm/v1/worker/tpu_model_runner.py:571: No type or annotation for returned value 'contains'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/worker/tpu_model_runner.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py
index d36423660427..70ffde39ca33 100644
--- a/vllm/v1/worker/tpu_model_runner.py
+++ b/vllm/v1/worker/tpu_model_runner.py
@@ -552,7 +552,7 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]:
         return kv_cache_spec
 
     def _get_slot_mapping_metadata(self, num_reqs,
-                                   num_scheduled_tokens_per_req):
+                                   num_scheduled_tokens_per_req) -> np.ndarray:
         """
         Computes metadata for mapping slots to blocks in the key-value (KV)
         cache for a batch of requests.
@@ -565,15 +565,15 @@ def _get_slot_mapping_metadata(self, num_reqs,
         Args:
             num_reqs (int): Number of requests in the current batch.
             num_scheduled_tokens_per_req (int or np.ndarray): Number of tokens
-            to be scheduled for each request.
+                to be scheduled for each request.
 
         Returns:
             np.ndarray: A 2D array of shape (total_block_len, 3), where each row
-            contains:
+                contains:
                 - kv_cache_start_index (int): The starting index in the KV cache
-                    for the corresponding slice.
+                  for the corresponding slice.
                 - new_kv_start_index (int): The starting index in the new KV
-                    cache for the corresponding slice.
+                  cache for the corresponding slice.
                 - slice_len (int): The length of the slice.
         """
         slices_start = self.input_batch.num_computed_tokens_cpu[:num_reqs]

From 00cbb5d08bc1655ea1c08fa3a97c6715272c9595 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:27:52 +0900
Subject: [PATCH 08/27] docs: indent in encoder_cache_manager

WARNING -  griffe: vllm/v1/core/encoder_cache_manager.py:258: Confusing indentation for continuation line 6 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/core/encoder_cache_manager.py:260: Confusing indentation for continuation line 8 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/core/encoder_cache_manager.py:306: Confusing indentation for continuation line 11 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/core/encoder_cache_manager.py:308: Confusing indentation for continuation line 13 in docstring, should be 4 * 2 = 8 spaces, not 6

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/core/encoder_cache_manager.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/core/encoder_cache_manager.py b/vllm/v1/core/encoder_cache_manager.py
index c9d18033a198..bd2ec036834b 100644
--- a/vllm/v1/core/encoder_cache_manager.py
+++ b/vllm/v1/core/encoder_cache_manager.py
@@ -255,9 +255,9 @@ def compute_encoder_budget(
 
     Returns:
         - Compute budget for encoder execution, measured in number of tokens
-          from the input sequence.
+            from the input sequence.
         - Space budget for encoder cache size, measured in number of tokens
-          from the input sequence.
+            from the input sequence.
     """
     if mm_registry.supports_multimodal_inputs(model_config):
         max_tokens_by_modality = mm_registry \
@@ -303,9 +303,9 @@ def compute_mm_encoder_budget(
 
     Returns:
         - Compute budget for encoder execution, measured in number of tokens
-          from the input sequence.
+            from the input sequence.
         - Space budget for encoder cache size, measured in number of tokens
-          from the input sequence.
+            from the input sequence.
     """
 
     if not max_tokens_by_modality:

From c3bbc276e07446dc2ab1174c5c7910431de17aad Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:36:27 +0900
Subject: [PATCH 09/27] docs: docstring in cpu_attn

WARNING -  griffe: vllm/v1/attention/backends/cpu_attn.py:494: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size * num_kv_heads * head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/cpu_attn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py
index 973979fdf7df..ced8234a7b43 100644
--- a/vllm/v1/attention/backends/cpu_attn.py
+++ b/vllm/v1/attention/backends/cpu_attn.py
@@ -491,7 +491,8 @@ def forward(
             query: shape = [num_tokens, num_heads * head_size]
             key: shape = [num_tokens, num_kv_heads * head_size]
             value: shape = [num_tokens, num_kv_heads * head_size]
-            kv_cache = [2, num_blocks, block_size * num_kv_heads * head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size * num_kv_heads * head_size]
                 NOTE: kv_cache will be an empty tensor with shape [0]
                 for profiling run.
             attn_metadata: Metadata for attention.

From b76bc2deed80c866d54971d317fcbf1f25e54c0b Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:37:36 +0900
Subject: [PATCH 10/27] docs: docstring in flex_attention

WARNING -  griffe: vllm/v1/attention/backends/flex_attention.py:692: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/flex_attention.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py
index 458562ebc8d2..a596f6b2b32a 100644
--- a/vllm/v1/attention/backends/flex_attention.py
+++ b/vllm/v1/attention/backends/flex_attention.py
@@ -689,7 +689,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From d0f6245729ee6f640865d9de9d347270bc0b3c4d Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:39:25 +0900
Subject: [PATCH 11/27] docs: docstring in pallas

WARNING -  griffe: vllm/v1/attention/backends/pallas.py:238: Failed to get 'name: description' pair from 'kv_cache = [num_blocks, block_size, num_kv_heads * 2, head_size]'
WARNING -  griffe: vllm/v1/attention/backends/pallas.py:332: Failed to get 'name: description' pair from 'kv_cache = [num_blocks, block_size, num_kv_heads * 2, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/pallas.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py
index fd97db0abb84..26f9abf13d0e 100644
--- a/vllm/v1/attention/backends/pallas.py
+++ b/vllm/v1/attention/backends/pallas.py
@@ -235,7 +235,8 @@ def forward(
             query: shape = [num_tokens, num_heads * head_size]
             key: shape = [num_tokens, num_kv_heads * head_size]
             value: shape = [num_tokens, num_kv_heads * head_size]
-            kv_cache = [num_blocks, block_size, num_kv_heads * 2, head_size]
+            kv_cache: shape =
+                [num_blocks, block_size, num_kv_heads * 2, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]
@@ -329,7 +330,7 @@ def write_to_kv_cache(
     Args:
         key: shape = [num_tokens, num_kv_heads, head_size]
         value: shape = [num_tokens, num_kv_heads, head_size]
-        kv_cache = [num_blocks, block_size, num_kv_heads * 2, head_size]
+        kv_cache: shape = [num_blocks, block_size, num_kv_heads * 2, head_size]
         num_slices_per_kv_cache_update_block: int
     """
     _, page_size, num_combined_kv_heads, head_size = kv_cache.shape

From 1dad7592acb2142cb509941249cdddc6632ede4a Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:49:15 +0900
Subject: [PATCH 12/27] docs: docstring in rocm_aiter_fa

WARNING -  griffe: vllm/v1/attention/backends/rocm_aiter_fa.py:432: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/rocm_aiter_fa.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 403ad8e88a95..173a0a255e49 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -429,7 +429,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From 5a3dd50ad676f7c2421d7c500d83f4165b044471 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:50:31 +0900
Subject: [PATCH 13/27] docs: docstring in tree_attn

WARNING -  griffe: vllm/v1/attention/backends/tree_attn.py:365: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/tree_attn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py
index c93223a34083..b96d957a150b 100644
--- a/vllm/v1/attention/backends/tree_attn.py
+++ b/vllm/v1/attention/backends/tree_attn.py
@@ -362,7 +362,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From 8c0b40c45da76e5c45a91b56698375fdc87ba7ce Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:52:10 +0900
Subject: [PATCH 14/27] docs: docstring in triton_attn

WARNING -  griffe: vllm/v1/attention/backends/triton_attn.py:288: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/triton_attn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py
index b12036c59979..a37a7f6811ef 100644
--- a/vllm/v1/attention/backends/triton_attn.py
+++ b/vllm/v1/attention/backends/triton_attn.py
@@ -285,7 +285,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From a0a56bd839047059f0bc0823d5851743abd9d0ca Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:53:02 +0900
Subject: [PATCH 15/27] docs: docstring in xformers

WARNING -  griffe: vllm/v1/attention/backends/xformers.py:333: Failed to get 'name: description' pair from 'kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/xformers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/xformers.py b/vllm/v1/attention/backends/xformers.py
index e0eb7d8be974..7f888c113574 100644
--- a/vllm/v1/attention/backends/xformers.py
+++ b/vllm/v1/attention/backends/xformers.py
@@ -330,7 +330,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]

From 70639c72cf87eef65acfeceb005b3f0edbce86be Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 21:54:10 +0900
Subject: [PATCH 16/27] docs: docstring in kv_cache_coordinator

WARNING -  griffe: vllm/v1/core/kv_cache_coordinator.py:121: No type or annotation for parameter 'num_tokens'
WARNING -  griffe: vllm/v1/core/kv_cache_coordinator.py:121: Parameter 'num_tokens' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/core/kv_cache_coordinator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/core/kv_cache_coordinator.py b/vllm/v1/core/kv_cache_coordinator.py
index f082ad00f2e3..9421341f990c 100644
--- a/vllm/v1/core/kv_cache_coordinator.py
+++ b/vllm/v1/core/kv_cache_coordinator.py
@@ -119,7 +119,8 @@ def cache_blocks(self, request: Request, num_computed_tokens: int) -> None:
 
         Args:
             request: The request.
-            num_tokens: The total number of tokens that need to be cached 
+            num_computed_tokens: The total number of tokens
+                that need to be cached
                 (including tokens that are already cached).
         """
         for manager in self.single_type_managers:

From d89966fef22515e467fb488730d1d411817fecff Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:00:16 +0900
Subject: [PATCH 17/27] docs: type hint of ray_distributed_executor

WARNING -  griffe: vllm/v1/executor/ray_distributed_executor.py:72: No type or annotation for parameter 'scheduler_output'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/executor/ray_distributed_executor.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/executor/ray_distributed_executor.py b/vllm/v1/executor/ray_distributed_executor.py
index c05ad1966d61..8394ae788ab0 100644
--- a/vllm/v1/executor/ray_distributed_executor.py
+++ b/vllm/v1/executor/ray_distributed_executor.py
@@ -8,6 +8,7 @@
 from vllm.executor.ray_distributed_executor import (  # noqa
     RayDistributedExecutor as RayDistributedExecutorV0)
 from vllm.logger import init_logger
+from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
 from vllm.v1.executor.abstract import Executor
 from vllm.v1.outputs import ModelRunnerOutput
@@ -64,7 +65,7 @@ def max_concurrent_batches(self) -> int:
 
     def execute_model(
         self,
-        scheduler_output,
+        scheduler_output: SchedulerOutput,
     ) -> Union[ModelRunnerOutput, Future[ModelRunnerOutput]]:
         """Execute the model on the Ray workers.
 

From a2e17ae846b1ee938174fd5a949e51f11bbba3c2 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:00:59 +0900
Subject: [PATCH 18/27] docs: type hint of prometheus

WARNING -  griffe: vllm/v1/metrics/prometheus.py:44: No type or annotation for returned value 'Registry'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/metrics/prometheus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/metrics/prometheus.py b/vllm/v1/metrics/prometheus.py
index 61ba5d66cb31..a43cf9ce255e 100644
--- a/vllm/v1/metrics/prometheus.py
+++ b/vllm/v1/metrics/prometheus.py
@@ -36,7 +36,7 @@ def setup_multiprocess_prometheus():
                        "and vLLM will properly handle cleanup.")
 
 
-def get_prometheus_registry():
+def get_prometheus_registry() -> CollectorRegistry:
     """Get the appropriate prometheus registry based on multiprocessing 
     configuration.
     

From 5b026241161cfc62889f227267181ef260df6241 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:03:10 +0900
Subject: [PATCH 19/27] docs: docstring in logits_processor interface

WARNING -  griffe: vllm/v1/sample/logits_processor/interface.py:83: Failed to get 'name: description' pair from 'batch_update is non-None iff there have been'
WARNING -  griffe: vllm/v1/sample/logits_processor/interface.py:84: Failed to get 'name: description' pair from 'changes to the batch makeup.'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/sample/logits_processor/interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/sample/logits_processor/interface.py b/vllm/v1/sample/logits_processor/interface.py
index 12b4db24bff8..4d3aaf43411e 100644
--- a/vllm/v1/sample/logits_processor/interface.py
+++ b/vllm/v1/sample/logits_processor/interface.py
@@ -80,7 +80,7 @@ def update_state(
         to each forward pass.
 
         Args:
-            batch_update is non-None iff there have been
-            changes to the batch makeup.
+            batch_update: Non-None iff there have been changes
+            to the batch makeup.
         """
         raise NotImplementedError

From 596bb46e34cc6a7a985d1d0e0c3f96e147fbdeb7 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:06:38 +0900
Subject: [PATCH 20/27] docs: docstring in rejection_sampler

WARNING -  griffe: vllm/v1/sample/rejection_sampler.py:70: Parameter 'bonus_token_ids_tensor' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/sample/rejection_sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/sample/rejection_sampler.py b/vllm/v1/sample/rejection_sampler.py
index b2354c53302a..2d9ce3101b6c 100644
--- a/vllm/v1/sample/rejection_sampler.py
+++ b/vllm/v1/sample/rejection_sampler.py
@@ -68,7 +68,7 @@ def forward(
                 different requests are flattened into a single tensor because
                 this is the shape of the output logits.
                 NOTE: `target_logits` can be updated in place to save memory.
-            bonus_token_ids_tensor (torch.Tensor):
+            bonus_token_ids (torch.Tensor):
                 A tensor containing bonus tokens. Shape is [batch_size, 1].
                 Bonus tokens are added to the end of the sequence if all
                 proposed tokens are accepted. We generate the bonus tokens

From ca1a7292bb79951ace6dd906ab9a54c58989e083 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:07:23 +0900
Subject: [PATCH 21/27] docs: docstring in tpu sampler

WARNING -  griffe: vllm/v1/sample/tpu/sampler.py:91: No type or annotation for parameter 'logits'
WARNING -  griffe: vllm/v1/sample/tpu/sampler.py:91: Parameter 'logits' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/sample/tpu/sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/sample/tpu/sampler.py b/vllm/v1/sample/tpu/sampler.py
index 04545d587e4a..e84136e3a6d0 100644
--- a/vllm/v1/sample/tpu/sampler.py
+++ b/vllm/v1/sample/tpu/sampler.py
@@ -89,7 +89,7 @@ def gather_logprobs(
         Gather logprobs for topk and sampled/prompt token.
 
         Args:
-          logits: (num tokens) x (vocab) tensor
+          logprobs: (num tokens) x (vocab) tensor
           num_logprobs: minimum number of logprobs to
                         retain per token
           token_ids: prompt tokens (if prompt logprobs)

From fb487bde0475b9046e839d9e340f1b86fa92a9ee Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:08:15 +0900
Subject: [PATCH 22/27] docs: indent in structured_output backend_types

WARNING -  griffe: vllm/v1/structured_output/backend_types.py:126: Confusing indentation for continuation line 5 in docstring, should be 4 * 2 = 8 spaces, not 6
WARNING -  griffe: vllm/v1/structured_output/backend_types.py:112: Confusing indentation for continuation line 5 in docstring, should be 4 * 2 = 8 spaces, not 6

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/structured_output/backend_types.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/structured_output/backend_types.py b/vllm/v1/structured_output/backend_types.py
index d500783aa4b3..9a53aa7a1ad1 100644
--- a/vllm/v1/structured_output/backend_types.py
+++ b/vllm/v1/structured_output/backend_types.py
@@ -110,7 +110,7 @@ def compile_grammar(self, request_type: StructuredOutputOptions,
 
         Args:
             request_type (StructuredOutputOptions): The type of structured
-              output request.
+                output request.
             grammar_spec (str): The grammar specification to compile.
 
         Returns:
@@ -124,7 +124,7 @@ def allocate_token_bitmask(self, max_num_seqs: int) -> torch.Tensor:
 
         Args:
             max_num_seqs (int): The maximum number of sequences for which
-              to allocate the bitmask.
+                to allocate the bitmask.
         """
 
     @abstractmethod

From 36b703474ff1bde3df746de95f10c81396a5a8ad Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:09:46 +0900
Subject: [PATCH 23/27] docs: docstring in worker gpu_input_batch

WARNING -  griffe: vllm/v1/worker/gpu_input_batch.py:529: No type or annotation for parameter 'empty_req_indices'
WARNING -  griffe: vllm/v1/worker/gpu_input_batch.py:529: Parameter 'empty_req_indices' does not appear in the function signature

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/worker/gpu_input_batch.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py
index 284af6bfedce..f4c2f45df595 100644
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -525,9 +525,6 @@ def condense(self) -> None:
         Any consecutive empty indices at the very end of the list are not
         filled.
 
-        Args:
-          empty_req_indices: empty indices which may be filled.
-
         Returns:
           swaps: list of (from,to) swap tuples for moved requests
           empty_req_indices: indices not filled by condensation

From 717c637b2df27a2241627186168e801f7315d464 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:11:01 +0900
Subject: [PATCH 24/27] docs: indent in worker gpu_model_runner

WARNING -  griffe: vllm/v1/worker/gpu_model_runner.py:2955: Failed to get 'name: description' pair from 'correct size but uninitialized shape.'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/worker/gpu_model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index d93460d618e7..ed2a54eebc98 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2953,7 +2953,7 @@ def _reshape_kv_cache_tensors(
         Args:
             kv_cache_config: The KV cache config
             kv_cache_raw_tensors: The KV cache buffer of each layer, with
-            correct size but uninitialized shape.
+                correct size but uninitialized shape.
         Returns:
             Dict[str, torch.Tensor]: A map between layer names to their
             corresponding memory buffer for KV cache.

From 85034bb06f1b3bce5d81d1cf0ec4cbbf2084e658 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:14:10 +0900
Subject: [PATCH 25/27] docs: indent in worker_base

WARNING -  griffe: vllm/v1/worker/worker_base.py:39: Failed to get 'name: description' pair from 'responsibilities'

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/worker/worker_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/worker_base.py b/vllm/v1/worker/worker_base.py
index 9c93754f93f8..038ce4b54f96 100644
--- a/vllm/v1/worker/worker_base.py
+++ b/vllm/v1/worker/worker_base.py
@@ -36,8 +36,8 @@ def __init__(
             local_rank: Local device index
             rank: Global rank in distributed setup
             distributed_init_method: Distributed initialization method
-            is_driver_worker: Whether this worker handles driver 
-            responsibilities
+            is_driver_worker: Whether this worker handles driver
+                responsibilities
         """
         # Configuration storage
         super().__init__(vllm_config=vllm_config)

From 8baf0cedec4a4fbd4316cb1246975f8866a42152 Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:16:07 +0900
Subject: [PATCH 26/27] docs: indent in logits_processor interface

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/sample/logits_processor/interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/sample/logits_processor/interface.py b/vllm/v1/sample/logits_processor/interface.py
index 4d3aaf43411e..c9c649705dfd 100644
--- a/vllm/v1/sample/logits_processor/interface.py
+++ b/vllm/v1/sample/logits_processor/interface.py
@@ -81,6 +81,6 @@ def update_state(
 
         Args:
             batch_update: Non-None iff there have been changes
-            to the batch makeup.
+                to the batch makeup.
         """
         raise NotImplementedError

From 427f4d885f7c2b7a34b39f05aa99b7bbcd13366f Mon Sep 17 00:00:00 2001
From: Zerohertz <ohg3417@gmail.com>
Date: Wed, 27 Aug 2025 22:28:33 +0900
Subject: [PATCH 27/27] style: ruff

Signed-off-by: Zerohertz <ohg3417@gmail.com>
---
 vllm/v1/attention/backends/flash_attn.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 04c4b6c35a86..dd2b956d4fa3 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -438,7 +438,8 @@ def forward(
             query: shape = [num_tokens, num_heads, head_size]
             key: shape = [num_tokens, num_kv_heads, head_size]
             value: shape = [num_tokens, num_kv_heads, head_size]
-            kv_cache: shape = [2, num_blocks, block_size, num_kv_heads, head_size]
+            kv_cache: shape =
+                [2, num_blocks, block_size, num_kv_heads, head_size]
             attn_metadata: Metadata for attention.
         Returns:
             shape = [num_tokens, num_heads * head_size]