CodeX comment

Jialin · Jialin · commit ad0d3d8c8a00 · 2025-10-14T00:30:44.000-07:00
Signed-off-by: Jialin Ouyang &lt;Jialin.Ouyang@gmail.com&gt;
diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py
@@ -6,6 +6,7 @@
 from typing import TYPE_CHECKING, NamedTuple, TypeAlias
 
 import torch
+from typing_extensions import TypeAlias
 
 if TYPE_CHECKING:
     from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorStats
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
@@ -36,6 +36,7 @@
     CommonAttentionMetadata,
 )
 from vllm.v1.kv_cache_interface import KVCacheConfig
+from vllm.v1.outputs import TokenIDs, convert_to_token_id_list, get_token_count
 from vllm.v1.sample.metadata import SamplingMetadata
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 from vllm.v1.utils import CpuGpuBuffer
@@ -475,7 +476,7 @@ def propose(
 
     def prepare_next_token_ids_cpu(
         self,
-        sampled_token_ids: list[list[int]],
+        sampled_token_ids: list[TokenIDs],
         requests: dict[str, CachedRequestState],
         gpu_input_batch: InputBatch,
         num_scheduled_tokens: dict[str, int],
@@ -490,6 +491,7 @@ def prepare_next_token_ids_cpu(
         req_ids = gpu_input_batch.req_ids
         next_token_ids: list[int] = []
         for i, token_ids in enumerate(sampled_token_ids):
+            token_ids = convert_to_token_id_list(token_ids)
             if token_ids:
                 # Common case.
                 next_token_id = token_ids[-1]
@@ -807,7 +809,7 @@ def propose_tree(
     def prepare_inputs(
         self,
         common_attn_metadata: CommonAttentionMetadata,
-        sampled_token_ids: list[list[int]],
+        sampled_token_ids: list[TokenIDs],
         num_draft_tokens: list[int],
     ) -> tuple[CommonAttentionMetadata, torch.Tensor]:
         """
@@ -833,7 +835,7 @@ def prepare_inputs(
         #                 q1 + q2, q1 + q2 + 1, ..., q1 + q2 + q3 - n3 - 1]
 
         num_rejected_tokens = [
-            n + 1 - len(sampled_token_ids[i]) if n > 0 else 0
+            n + 1 - get_token_count(sampled_token_ids[i]) if n > 0 else 0
             for i, n in enumerate(num_draft_tokens)
         ]
         num_rejected_tokens = torch.tensor(num_rejected_tokens, dtype=torch.int32)