Skip to content

Commit de4c7ec

Browse files
Isotr0pyyangw-dev
authored andcommitted
[VLM] Florence-2 supports online serving (vllm-project#16164)
Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: Yang Wang <elainewy@meta.com>
1 parent d2a2541 commit de4c7ec

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

examples/template_florence2.jinja

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{%- for message in messages -%}
2+
{%- if message['role'] == 'user' -%}
3+
{{- message['content'] -}}
4+
{%- elif message['role'] == 'assistant' -%}
5+
{{- message['content'] -}}
6+
{%- endif -%}
7+
{%- endfor -%}

vllm/entrypoints/chat_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,8 @@ def _placeholder_str(self, modality: ModalityStr,
487487
return "<|endoftext10|>" # 200010 (see vocab.json in hf model)
488488
if model_type in ("minicpmo", "minicpmv"):
489489
return "(<image>./</image>)"
490-
if model_type in ("blip-2", "fuyu", "paligemma", "pixtral",
491-
"mistral3"):
490+
if model_type in ("blip-2", "florence2", "fuyu", "paligemma",
491+
"pixtral", "mistral3"):
492492
# These models do not use image tokens in the prompt
493493
return None
494494
if model_type == "qwen":

vllm/model_executor/models/florence2.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import torch.nn as nn
1111
import torch.nn.functional as F
1212
from einops import rearrange
13-
from transformers import BatchFeature, PretrainedConfig
13+
from transformers import BartTokenizer, BatchFeature, PretrainedConfig
1414

1515
from vllm.config import VllmConfig
1616
from vllm.model_executor.layers.logits_processor import LogitsProcessor
@@ -826,6 +826,18 @@ def create_decoder_prompt(
826826
) -> Union[str, list[int]]:
827827
return [self.info.get_hf_config().eos_token_id]
828828

829+
def _apply_hf_processor_tokens_only(
830+
self,
831+
prompt_tokens: list[int],
832+
) -> list[int]:
833+
hf_processor = self.info.get_hf_processor()
834+
tokenizer: BartTokenizer = hf_processor.tokenizer
835+
prompt_text = tokenizer.decode(prompt_tokens)
836+
# convert task tokens to prompt
837+
prompt_text = hf_processor._construct_prompts([prompt_text])[0]
838+
prompt_tokens = tokenizer.encode(prompt_text, add_special_tokens=False)
839+
return prompt_tokens
840+
829841
def _call_hf_processor(
830842
self,
831843
prompt: str,

0 commit comments

Comments
 (0)