|
160 | 160 | ), |
161 | 161 | "aya_vision": VLMTestInfo( |
162 | 162 | models=["CohereForAI/aya-vision-8b"], |
163 | | - test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), |
| 163 | + test_type=(VLMTestType.IMAGE), |
164 | 164 | prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501 |
165 | 165 | single_image_prompts=IMAGE_ASSETS.prompts({ |
166 | 166 | "stop_sign": "<image>What's the content in the center of the image?", # noqa: E501 |
167 | 167 | "cherry_blossom": "<image>What is the season?", # noqa: E501 |
168 | 168 | }), |
169 | 169 | multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501 |
170 | | - max_model_len=8192, |
| 170 | + max_model_len=4096, |
171 | 171 | max_num_seqs=2, |
172 | 172 | auto_cls=AutoModelForImageTextToText, |
173 | | - vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}} |
| 173 | + vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}, |
| 174 | + ), |
| 175 | + "aya_vision-multi_image": VLMTestInfo( |
| 176 | + models=["CohereForAI/aya-vision-8b"], |
| 177 | + test_type=(VLMTestType.MULTI_IMAGE), |
| 178 | + prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", # noqa: E501 |
| 179 | + single_image_prompts=IMAGE_ASSETS.prompts({ |
| 180 | + "stop_sign": "<image>What's the content in the center of the image?", # noqa: E501 |
| 181 | + "cherry_blossom": "<image>What is the season?", # noqa: E501 |
| 182 | + }), |
| 183 | + multi_image_prompt="<image><image>Describe the two images in detail.", # noqa: E501 |
| 184 | + max_model_len=4096, |
| 185 | + max_num_seqs=2, |
| 186 | + auto_cls=AutoModelForImageTextToText, |
| 187 | + vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}}, |
| 188 | + marks=[large_gpu_mark(min_gb=32)], |
174 | 189 | ), |
175 | 190 | "blip2": VLMTestInfo( |
176 | 191 | # TODO: Change back to 2.7b once head_dim = 80 is supported |
|
0 commit comments