align tokenizer interface

Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
youngkent · Jan 29, 2025 · ce4d0af · ce4d0af
1 parent b6c37e3
commit ce4d0af
Show file tree

Hide file tree

Showing 7 changed files with 28 additions and 14 deletions.
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -1105,9 +1105,7 @@ def ensure_str(prompt: SingletonPrompt):
         parsed_prompts = []
 
         for q, t in input_pairs:
-            prompt_inputs = tokenizer(text=q,
-                                      text_pair=t,
-                                      **tokenization_kwargs)
+            prompt_inputs = tokenizer(q, text_pair=t, **tokenization_kwargs)
             engine_prompt = TokensPrompt(
                 prompt_token_ids=prompt_inputs["input_ids"],
                 token_type_ids=prompt_inputs.get("token_type_ids"))

diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
@@ -398,8 +398,7 @@ async def _preprocess_chat(
         _chat_template_kwargs.update(chat_template_kwargs or {})
 
         request_prompt: Union[str, List[int]]
-        is_mistral_tokenizer = isinstance(tokenizer, MistralTokenizer)
-        if is_mistral_tokenizer:
+        if isinstance(tokenizer, MistralTokenizer):
             request_prompt = apply_mistral_chat_template(
                 tokenizer,
                 messages=messages,

diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py
@@ -119,7 +119,7 @@ async def create_score(
 
                 tokenize_async = make_async(tokenizer.__call__,
                                             executor=self._tokenizer_executor)
-                prompt_inputs = await tokenize_async(text=q,
+                prompt_inputs = await tokenize_async(q,
                                                      text_pair=t,
                                                      **tokenization_kwargs)
 

diff --git a/vllm/logits_process.py b/vllm/logits_process.py
@@ -29,7 +29,7 @@ def get_bad_words_logits_processors(
 
             if isinstance(tokenizer, MistralTokenizer):
                 # Mistral tokenizers should not add special tokens
-                prompt_token_ids = tokenizer.encode(prompt=prompt)
+                prompt_token_ids = tokenizer.encode(text=prompt)
             else:
                 prompt_token_ids = tokenizer.encode(text=prompt,
                                                     add_special_tokens=False)

diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
@@ -183,6 +183,8 @@ def get_tokenizer(
             'encoding and decoding.',
             FutureWarning,
             stacklevel=2)
+
+    tokenizer: AnyTokenizer
     if tokenizer_mode == "mistral":
         tokenizer = MistralTokenizer.from_pretrained(str(tokenizer_name),
                                                      revision=revision)

diff --git a/vllm/transformers_utils/tokenizer_base.py b/vllm/transformers_utils/tokenizer_base.py
@@ -48,6 +48,11 @@ def vocab_size(self) -> int:
     def max_token_id(self) -> int:
         raise NotImplementedError()
 
+    @property
+    @abstractmethod
+    def sep_token(self) -> int:
+        raise NotImplementedError()
+
     @abstractmethod
     def __len__(self) -> int:
         raise NotImplementedError()
@@ -56,6 +61,7 @@ def __len__(self) -> int:
     def __call__(
         self,
         prompt: Union[str, List[str], List[int]],
+        text_pair: Optional[str] = None,
         add_special_tokens: bool = False,
         truncation: bool = False,
         max_length: Optional[int] = None,
@@ -73,14 +79,16 @@ def get_added_vocab(self) -> Dict[str, int]:
     @abstractmethod
     def encode_one(
         self,
-        prompt: str,
+        text: str,
         truncation: bool = False,
         max_length: Optional[int] = None,
     ) -> List[int]:
         raise NotImplementedError()
 
     @abstractmethod
-    def encode(self, prompt: str) -> List[int]:
+    def encode(self,
+               text: str,
+               add_special_tokens: Optional[bool] = None) -> List[int]:
         raise NotImplementedError()
 
     @abstractmethod
@@ -114,7 +122,7 @@ class TokenizerRegistry:
     REGISTRY: Dict[str, Tuple[str, str]] = {}
 
     @staticmethod
-    def register(name: str, module: str, class_name: str) -> TokenizerBase:
+    def register(name: str, module: str, class_name: str) -> None:
         TokenizerRegistry.REGISTRY[name] = (module, class_name)
 
     @staticmethod

diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
@@ -220,12 +220,17 @@ def vocab_size(self) -> int:
     def max_token_id(self) -> int:
         return self._max_token_id
 
+    @property
+    def sep_token(self) -> int:
+        raise NotImplementedError()
+
     def __len__(self) -> int:
         return self.vocab_size
 
     def __call__(
         self,
         prompt: Union[str, List[str], List[int]],
+        text_pair: Optional[str] = None,
         add_special_tokens: bool = False,
         truncation: bool = False,
         max_length: Optional[int] = None,
@@ -257,22 +262,24 @@ def get_added_vocab(self) -> Dict[str, int]:
 
     def encode_one(
         self,
-        prompt: str,
+        text: str,
         truncation: bool = False,
         max_length: Optional[int] = None,
     ) -> List[int]:
         # Mistral Tokenizers should not add special tokens
-        input_ids = self.encode(prompt)
+        input_ids = self.encode(text)
 
         if truncation:
             input_ids = input_ids[:max_length]
         return input_ids
 
-    def encode(self, prompt: str) -> List[int]:
+    def encode(self,
+               text: str,
+               add_special_tokens: Optional[bool] = None) -> List[int]:
         # `encode` should only be used for prompt completion
         # it should never be used for chat_completion.
         # For chat completion use `apply_chat_template`
-        return self.tokenizer.encode(prompt, bos=True, eos=False)
+        return self.tokenizer.encode(text, bos=True, eos=False)
 
     def apply_chat_template(self,
                             messages: List["ChatCompletionMessageParam"],