Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion examples/offline_inference/encoder_decoder_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class ModelRequestData(NamedTuple):
def run_florence2():
engine_args = EngineArgs(
model="microsoft/Florence-2-large",
tokenizer="facebook/bart-large",
tokenizer="Isotr0py/Florence-2-tokenizer",
max_num_seqs=8,
trust_remote_code=True,
limit_mm_per_prompt={"image": 1},
Expand Down Expand Up @@ -165,6 +165,7 @@ def main(args):
temperature=0,
top_p=1.0,
max_tokens=64,
skip_special_tokens=False,
)

start = time.time()
Expand Down
2 changes: 1 addition & 1 deletion examples/offline_inference/vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def run_florence2(questions: list[str], modality: str) -> ModelRequestData:

engine_args = EngineArgs(
model="microsoft/Florence-2-large",
tokenizer="facebook/bart-large",
tokenizer="Isotr0py/Florence-2-tokenizer",
max_model_len=4096,
max_num_seqs=2,
trust_remote_code=True,
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,13 +925,15 @@ def generate_encoder_decoder_greedy_logprobs(
max_tokens: int,
num_logprobs: int,
num_prompt_logprobs: Optional[int] = None,
skip_special_tokens: bool = True,
) -> Union[list[TokensTextLogprobs],
list[TokensTextLogprobsPromptLogprobs]]:
greedy_logprobs_params = SamplingParams(
temperature=0.0,
max_tokens=max_tokens,
logprobs=num_logprobs,
prompt_logprobs=(num_prompt_logprobs),
skip_special_tokens=skip_special_tokens,
)
'''
Greedy logprobs generation for vLLM encoder/decoder models
Expand Down
17 changes: 10 additions & 7 deletions tests/models/encoder_decoder/vision_language/test_florence2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
from ...utils import check_logprobs_close

MODELS = ["microsoft/Florence-2-base"]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
TOKENIZER = "facebook/bart-base"
# Florence-2 model repo's tokenizer config is missing some special tokens.
# Therefore, we use a converted tokenizer from a forked repo
TOKENIZER = "Isotr0py/Florence-2-tokenizer"
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
"<CAPTION>", # special task token
"<OD>", # special task token which will output special tokens
"cherry_blossom":
"Describe in detail what is shown in the image.",
})
Expand All @@ -45,7 +45,6 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,
output_ids, output_str, out_logprobs = hf_output

output_str = output_str.replace("</s>", "").replace("<s>", "")
output_ids = [ids for ids in output_ids if ids not in [0, 2]]

return output_ids, output_str, out_logprobs

Expand All @@ -71,8 +70,11 @@ def run_test(
enforce_eager=True) as vllm_model:
vllm_outputs_per_case = [
vllm_model.generate_encoder_decoder_greedy_logprobs(
prompts, max_tokens, num_logprobs=num_logprobs)
for prompts in inputs
prompts,
max_tokens,
num_logprobs=num_logprobs,
skip_special_tokens=False,
) for prompts in inputs
]

hf_inputs = [get_hf_images_prompts(prompts) for prompts in inputs]
Expand All @@ -93,6 +95,7 @@ def run_test(
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
num_outputs_0_skip_tokens=1,
)


Expand Down
2 changes: 1 addition & 1 deletion tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def check_available_online(
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
"Florence2ForConditionalGeneration": _HfExamplesInfo("microsoft/Florence-2-base", # noqa: E501
tokenizer="facebook/bart-base",
tokenizer="Isotr0py/Florence-2-tokenizer",
trust_remote_code=True), # noqa: E501
"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501
"WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"), # noqa: E501
Expand Down