Skip to content

Commit

Permalink
Merge branch 'EvolvingLMMs-Lab:main' into add-tinyllava
Browse files Browse the repository at this point in the history
  • Loading branch information
zjysteven authored Jun 16, 2024
2 parents dbfb238 + 74facb4 commit 9bab677
Show file tree
Hide file tree
Showing 11 changed files with 791 additions and 6 deletions.
43 changes: 43 additions & 0 deletions lmms_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ def _download_from_youtube(path):
force_unzip = dataset_kwargs.get("force_unzip", False)
cache_path = snapshot_download(repo_id=self.DATASET_PATH, repo_type="dataset", force_download=force_download, etag_timeout=60)
zip_files = glob(os.path.join(cache_path, "**/*.zip"), recursive=True)
tar_files = glob(os.path.join(cache_path, "**/*.tar*"), recursive=True)

def unzip_video_data(zip_file):
import zipfile
Expand All @@ -786,10 +787,52 @@ def unzip_video_data(zip_file):
zip_ref.extractall(cache_dir)
eval_logger.info(f"Extracted all files from {zip_file} to {cache_dir}")

def untar_video_data(tar_file):
import tarfile
with tarfile.open(tar_file, "r") as tar_ref:
tar_ref.extractall(cache_dir)
eval_logger.info(f"Extracted all files from {tar_file} to {cache_dir}")



def concat_tar_parts(tar_parts, output_tar):
with open(output_tar, 'wb') as out_tar:
from tqdm import tqdm
for part in tqdm(sorted(tar_parts)):
with open(part, 'rb') as part_file:
out_tar.write(part_file.read())
eval_logger.info(f"Concatenated parts {tar_parts} into {output_tar}")

# Unzip zip files if needed
if force_unzip or (not os.path.exists(cache_dir) and len(zip_files) > 0):
for zip_file in zip_files:
unzip_video_data(zip_file)

# Concatenate and extract tar files if needed
if force_unzip or (not os.path.exists(cache_dir) and len(tar_files) > 0):
tar_parts_dict = {}

# Group tar parts together
for tar_file in tar_files:
base_name = tar_file.split('.tar')[0]
if base_name not in tar_parts_dict:
tar_parts_dict[base_name] = []
tar_parts_dict[base_name].append(tar_file)


# Concatenate and untar split parts
for base_name, parts in tar_parts_dict.items():
eval_logger.info(f"Extracting following tar files: {parts}")
output_tar = base_name + ".tar"
if not os.path.exists(output_tar):
eval_logger.info(f"Start concatenating tar files")

concat_tar_parts(parts, output_tar)
eval_logger.info(f"Finish concatenating tar files")

if not os.path.exists(os.path.join(cache_dir, os.path.basename(base_name))):
untar_video_data(output_tar)

accelerator.wait_for_everyone()
dataset_kwargs.pop("cache_dir")
dataset_kwargs.pop("video")
Expand Down
3 changes: 2 additions & 1 deletion lmms_eval/models/llava_vid.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def __init__(
self.mm_spatial_pool_out_channels = int(mm_spatial_pool_out_channels)
self.mm_spatial_pool_mode = mm_spatial_pool_mode
self.max_frames_num = int(max_frames_num)
print(self.max_frames_num)
if self.overwrite == True:
overwrite_config = {}
overwrite_config["mm_resampler_type"] = self.mm_resampler_type
Expand Down Expand Up @@ -416,4 +417,4 @@ def generate_until(self, requests) -> List[str]:
outputs = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
res.append(outputs)
pbar.update(1)
return res
return res
16 changes: 11 additions & 5 deletions lmms_eval/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,16 @@ def _collate(x):
contexts = list(contexts)
for i in range(len(contexts)):
if "<image>" in contexts[i]:
query = contexts[i].replace("<image>", "<|image_1|>")
query = "" + contexts[i]
img_placeholder_count = 1
while "<image>" in query:
query = query.replace("<image>", f"<|image_{img_placeholder_count}|>", 1)
img_placeholder_count += 1
else:
query = f"<|image_1|>\n{contexts[i]}"
query = ""
for placeholder_id in range(len(visuals)):
query += f"<|image_{placeholder_id+1}|>\n"
query += contexts[i]
messages = [
{"role": "user", "content": query}
]
Expand All @@ -196,12 +203,11 @@ def _collate(x):
tokenize=False,
add_generation_prompt=True)
assert len(contexts) == 1
# We always pass a single image given that the model only accepts one image (as of 5/21/24).
#
context = contexts[0]
pil_image = visuals[0]
input_ids = self._processor(
text=context,
images=[pil_image],
images=visuals,
return_tensors="pt").to(self._device, self.model.dtype)
# Setting default parameters.
if "max_new_tokens" not in gen_kwargs:
Expand Down
29 changes: 29 additions & 0 deletions lmms_eval/tasks/longvideobench/longvideobench_val_i.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
dataset_path: longvideobench/LongVideoBench
dataset_kwargs:
token: True
cache_dir: longvideobench
video: True
force_download: False
local_files_only: False
# From_YouTube: True
task: longvideobench_val_i
test_split: validation
doc_to_visual: !function utils.longvideobench_doc_to_visual_i
doc_to_text: !function utils.longvideobench_doc_to_text
doc_to_target: "correct_choice"
generation_kwargs:
max_new_tokens: 32
temperature: 0
do_sample: False
process_results: !function utils.longvideobench_process_results
metric_list:
- metric: lvb_acc
aggregation: !function utils.longvideobench_aggregate_results
higher_is_better: true

model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "Answer with the option's letter from the given choices directly.\n"
insert_interleave_subtitles: True

28 changes: 28 additions & 0 deletions lmms_eval/tasks/longvideobench/longvideobench_val_v.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
dataset_path: longvideobench/LongVideoBench
dataset_kwargs:
token: True
cache_dir: longvideobench
video: True
force_download: False
local_files_only: False
# From_YouTube: True
task: longvideobench_val_v
test_split: validation
doc_to_visual: !function utils.longvideobench_doc_to_visual_v
doc_to_text: !function utils.longvideobench_doc_to_text
doc_to_target: "correct_choice"
generation_kwargs:
max_new_tokens: 32
temperature: 0
do_sample: False
process_results: !function utils.longvideobench_process_results
metric_list:
- metric: lvb_acc
aggregation: !function utils.longvideobench_aggregate_results
higher_is_better: true

model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "Answer with the option's letter from the given choices directly.\n"

Loading

0 comments on commit 9bab677

Please sign in to comment.