Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@
IMAGE_URLS = [
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg",
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg",
"https://upload.wikimedia.org/wikipedia/commons/2/26/Ultramarine_Flycatcher_%28Ficedula_superciliaris%29_Naggar%2C_Himachal_Pradesh%2C_2013_%28cropped%29.JPG",
"https://upload.wikimedia.org/wikipedia/commons/thumb/e/e5/Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg/2560px-Anim1754_-_Flickr_-_NOAA_Photo_Library_%281%29.jpg",
"https://upload.wikimedia.org/wikipedia/commons/d/d4/Starfish%2C_Caswell_Bay_-_geograph.org.uk_-_409413.jpg",
"https://upload.wikimedia.org/wikipedia/commons/6/69/Grapevinesnail_01.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0b/Texas_invasive_Musk_Thistle_1.jpg/1920px-Texas_invasive_Musk_Thistle_1.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/Huskiesatrest.jpg/2880px-Huskiesatrest.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg",
"https://upload.wikimedia.org/wikipedia/commons/3/30/George_the_amazing_guinea_pig.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/1/1f/Oryctolagus_cuniculus_Rcdo.jpg/1920px-Oryctolagus_cuniculus_Rcdo.jpg",
"https://upload.wikimedia.org/wikipedia/commons/9/98/Horse-and-pony.jpg",
]


Expand Down Expand Up @@ -285,8 +295,7 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:

engine_args = EngineArgs(
model=model_name,
max_model_len=8192,
max_num_seqs=4,
max_model_len=131072,
tensor_parallel_size=8,
limit_mm_per_prompt={"image": len(image_urls)},
)
Expand Down Expand Up @@ -660,7 +669,7 @@ def run_generate(model, question: str, image_urls: list[str],
llm.llm_engine.add_lora(lora_request=lora_request)

sampling_params = SamplingParams(temperature=0.0,
max_tokens=128,
max_tokens=256,
stop_token_ids=req_data.stop_token_ids)

outputs = llm.generate(
Expand Down Expand Up @@ -694,7 +703,7 @@ def run_chat(model: str, question: str, image_urls: list[str],
llm.llm_engine.add_lora(lora_request=lora_request)

sampling_params = SamplingParams(temperature=0.0,
max_tokens=128,
max_tokens=256,
stop_token_ids=req_data.stop_token_ids)
outputs = llm.chat(
[{
Expand Down Expand Up @@ -729,10 +738,12 @@ def main(args: Namespace):
method = args.method
seed = args.seed

image_urls = IMAGE_URLS[:args.num_images]

if method == "generate":
run_generate(model, QUESTION, IMAGE_URLS, seed)
run_generate(model, QUESTION, image_urls, seed)
elif method == "chat":
run_chat(model, QUESTION, IMAGE_URLS, seed)
run_chat(model, QUESTION, image_urls, seed)
else:
raise ValueError(f"Invalid method: {method}")

Expand All @@ -757,6 +768,12 @@ def main(args: Namespace):
type=int,
default=None,
help="Set the seed when initializing `vllm.LLM`.")
parser.add_argument(
"--num-images",
"-n",
choices=list(range(1, 13)), # 12 is the max number of images
default=2,
help="Number of images to use for the demo.")

args = parser.parse_args()
main(args)
4 changes: 3 additions & 1 deletion vllm/model_executor/models/mllama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,9 @@ def get_hf_processor(self, **kwargs: object) -> Llama4Processor:
**kwargs)

def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
return {"image": 10}
# Although vLLM can support more images from an infra capability
# perspective, we do not recommend using >10 images in practice.
return {"image": None}

@staticmethod
def get_patch_per_chunk(vision_config: Llama4VisionConfig) -> int:
Expand Down