remove modification for HF process result

kylehh · kylehh · commit fe7fa1542a71 · 2025-02-28T09:27:15.000-08:00
Signed-off-by: Kyle Huang &lt;kylhuang@nvidia.com&gt;
diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py
@@ -124,7 +124,7 @@ def _call_hf_processor(
             prompt_ids = tokenizer.encode(prompt)
             return BatchFeature(dict(input_ids=[prompt_ids]), tensor_type="pt")
 
-        processed_outputs = super()._call_hf_processor(
+        return super()._call_hf_processor(
             prompt=prompt,
             mm_data=mm_data,
             mm_kwargs=mm_kwargs,
@@ -134,10 +134,10 @@ def _call_hf_processor(
         # Otherwise it will fail the language feature
         # This is for Paligemma 1 model only (tokenizier.add_bos_token == True)
         # Paligemma2 does NOT have this problem (add_bos_token == False)
-        if processed_outputs["input_ids"][0][0] == tokenizer.bos_token_id:
-            prompt_ids_without_bos = processed_outputs["input_ids"][0][1:]
-            processed_outputs["input_ids"] = prompt_ids_without_bos[None, :]
-        return processed_outputs
+        # if processed_outputs["input_ids"][0][0] == tokenizer.bos_token_id:
+        #     prompt_ids_without_bos = processed_outputs["input_ids"][0][1:]
+        #     processed_outputs["input_ids"] = prompt_ids_without_bos[None, :]
+        # return processed_outputs
 
     def _get_mm_fields_config(
         self,
@@ -163,15 +163,15 @@ def _get_prompt_updates(
         assert isinstance(bos_token_id, int)
 
         # Paligemma 1 and 2 have different tokenizer.add_bos_token
-        # Replace <bos> with <image>*n + <bos> for Paligemma 1
+        # Replace <bos> with <bos> + <image>*n + <bos> for Paligemma 1
         # Insert <image>*n + <bos> for Paligemma 2
         if tokenizer.add_bos_token:
             return [
                 PromptReplacement(
                     modality="image",
                     target=[bos_token_id],
                     replacement=PromptUpdateDetails(
-                        full=image_tokens + [bos_token_id],
+                        full=[bos_token_id] + image_tokens + [bos_token_id],
                         features=image_tokens,
                     ),
                 )