@@ -326,6 +326,45 @@ def load_llama4(question: str, image_urls: list[str]) -> ModelRequestData:
326326 )
327327
328328
329+ def load_kimi_vl (question : str , image_urls : list [str ]) -> ModelRequestData :
330+ model_name = "moonshotai/Kimi-VL-A3B-Instruct"
331+
332+ engine_args = EngineArgs (
333+ model = model_name ,
334+ max_model_len = 4096 ,
335+ max_num_seqs = 4 ,
336+ tensor_parallel_size = 1 ,
337+ limit_mm_per_prompt = {"image" : len (image_urls )},
338+ trust_remote_code = True ,
339+ )
340+
341+ placeholders = [{"type" : "image" , "image" : url } for url in image_urls ]
342+ messages = [{
343+ "role" :
344+ "user" ,
345+ "content" : [
346+ * placeholders ,
347+ {
348+ "type" : "text" ,
349+ "text" : question
350+ },
351+ ],
352+ }]
353+
354+ processor = AutoProcessor .from_pretrained (model_name ,
355+ trust_remote_code = True )
356+
357+ prompt = processor .apply_chat_template (messages ,
358+ tokenize = False ,
359+ add_generation_prompt = True )
360+
361+ return ModelRequestData (
362+ engine_args = engine_args ,
363+ prompt = prompt ,
364+ image_data = [fetch_image (url ) for url in image_urls ],
365+ )
366+
367+
329368def load_mistral3 (question : str , image_urls : list [str ]) -> ModelRequestData :
330369 model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
331370
@@ -640,6 +679,7 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
640679 "h2ovl_chat" : load_h2ovl ,
641680 "idefics3" : load_idefics3 ,
642681 "internvl_chat" : load_internvl ,
682+ "kimi_vl" : load_kimi_vl ,
643683 "llama4" : load_llama4 ,
644684 "mistral3" : load_mistral3 ,
645685 "mllama" : load_mllama ,
0 commit comments