diff --git a/examples/offline_inference/encoder_decoder_multimodal.py b/examples/offline_inference/encoder_decoder_multimodal.py index 61e5f5eae4ef..2883c37ca236 100644 --- a/examples/offline_inference/encoder_decoder_multimodal.py +++ b/examples/offline_inference/encoder_decoder_multimodal.py @@ -22,7 +22,7 @@ class ModelRequestData(NamedTuple): def run_florence2(): engine_args = EngineArgs( model="microsoft/Florence-2-large", - tokenizer="facebook/bart-large", + tokenizer="Isotr0py/Florence-2-tokenizer", max_num_seqs=8, trust_remote_code=True, limit_mm_per_prompt={"image": 1}, @@ -165,6 +165,7 @@ def main(args): temperature=0, top_p=1.0, max_tokens=64, + skip_special_tokens=False, ) start = time.time() diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index 6b533346ac31..4476009fd271 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -150,7 +150,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData: engine_args = EngineArgs( model="microsoft/Florence-2-large", - tokenizer="facebook/bart-large", + tokenizer="Isotr0py/Florence-2-tokenizer", max_model_len=4096, max_num_seqs=2, trust_remote_code=True, diff --git a/tests/conftest.py b/tests/conftest.py index d272f448f61f..25e70319e2cc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -925,6 +925,7 @@ def generate_encoder_decoder_greedy_logprobs( max_tokens: int, num_logprobs: int, num_prompt_logprobs: Optional[int] = None, + skip_special_tokens: bool = True, ) -> Union[list[TokensTextLogprobs], list[TokensTextLogprobsPromptLogprobs]]: greedy_logprobs_params = SamplingParams( @@ -932,6 +933,7 @@ def generate_encoder_decoder_greedy_logprobs( max_tokens=max_tokens, logprobs=num_logprobs, prompt_logprobs=(num_prompt_logprobs), + skip_special_tokens=skip_special_tokens, ) ''' Greedy logprobs generation for vLLM encoder/decoder models diff --git a/tests/models/encoder_decoder/vision_language/test_florence2.py b/tests/models/encoder_decoder/vision_language/test_florence2.py index a6ec333e2e9b..14b64393bf52 100644 --- a/tests/models/encoder_decoder/vision_language/test_florence2.py +++ b/tests/models/encoder_decoder/vision_language/test_florence2.py @@ -13,12 +13,12 @@ from ...utils import check_logprobs_close MODELS = ["microsoft/Florence-2-base"] -# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer -# Therefore, we borrow the BartTokenizer from the original Bart model -TOKENIZER = "facebook/bart-base" +# Florence-2 model repo's tokenizer config is missing some special tokens. +# Therefore, we use a converted tokenizer from a forked repo +TOKENIZER = "Isotr0py/Florence-2-tokenizer" HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ "stop_sign": - "