LongVideoBench support: image LMMs (idefics2, phi3) and video LMMs (L…

…LaVA-Next-Video-34B)
EvolvingLMMs-Lab · Jun 15, 2024 · 62ea8ce · 62ea8ce
1 parent ea14cd4
commit 62ea8ce
Show file tree

Hide file tree

Showing 4 changed files with 419 additions and 1 deletion.
diff --git a/lmms_eval/models/llava_vid.py b/lmms_eval/models/llava_vid.py
@@ -96,6 +96,7 @@ def __init__(
         self.mm_spatial_pool_out_channels = int(mm_spatial_pool_out_channels)
         self.mm_spatial_pool_mode = mm_spatial_pool_mode
         self.max_frames_num = int(max_frames_num)
+        print(self.max_frames_num)
         if self.overwrite == True:
             overwrite_config = {}
             overwrite_config["mm_resampler_type"] = self.mm_resampler_type
@@ -404,7 +405,7 @@ def generate_until(self, requests) -> List[str]:
                     attention_mask=attention_masks,
                     modalities="video",
                     use_cache=self.use_cache,
-                    stopping_criteria=[stopping_criteria],
+                    #stopping_criteria=[stopping_criteria],
                     do_sample=True if gen_kwargs["temperature"] > 0 else False,
                     temperature=gen_kwargs["temperature"],
                     top_p=gen_kwargs["top_p"],

diff --git a/lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml b/lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml
@@ -0,0 +1,29 @@
+dataset_path: longvideobench/LongVideoBench
+dataset_kwargs:
+  token: True
+  cache_dir: longvideobench
+  video: True
+  force_download: False
+  local_files_only: False
+  # From_YouTube: True
+task: longvideobench_val_i
+test_split: validation
+doc_to_visual: !function utils.longvideobench_doc_to_visual_i
+doc_to_text: !function utils.longvideobench_doc_to_text
+doc_to_target: "correct_choice"
+generation_kwargs:
+  max_new_tokens: 32
+  temperature: 0
+  do_sample: False
+process_results: !function utils.longvideobench_process_results
+metric_list:
+  - metric: lvb_acc
+    aggregation: !function utils.longvideobench_aggregate_results
+    higher_is_better: true
+
+model_specific_prompt_kwargs:
+  default:
+    pre_prompt: ""
+    post_prompt: "Answer with the option's letter from the given choices directly.\n"
+    insert_interleave_subtitles: True
+
diff --git a/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml b/lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml
@@ -0,0 +1,28 @@
+dataset_path: longvideobench/LongVideoBench
+dataset_kwargs:
+  token: True
+  cache_dir: longvideobench
+  video: True
+  force_download: False
+  local_files_only: False
+  # From_YouTube: True
+task: longvideobench_val_v
+test_split: validation
+doc_to_visual: !function utils.longvideobench_doc_to_visual_v
+doc_to_text: !function utils.longvideobench_doc_to_text
+doc_to_target: "correct_choice"
+generation_kwargs:
+  max_new_tokens: 32
+  temperature: 0
+  do_sample: False
+process_results: !function utils.longvideobench_process_results
+metric_list:
+  - metric: lvb_acc
+    aggregation: !function utils.longvideobench_aggregate_results
+    higher_is_better: true
+
+model_specific_prompt_kwargs:
+  default:
+    pre_prompt: ""
+    post_prompt: "Answer with the option's letter from the given choices directly.\n"
+