-
Notifications
You must be signed in to change notification settings - Fork 174
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
jzhang2427
committed
Oct 16, 2024
1 parent
8c2d89b
commit 2d466f7
Showing
2 changed files
with
140 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import os | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
import yaml | ||
from loguru import logger as eval_logger | ||
|
||
hf_home = os.getenv("HF_HOME", "~/.cache/huggingface/") | ||
base_cache_dir = os.path.expanduser(hf_home) | ||
with open(Path(__file__).parent / "vinoground.yaml", "r") as f: | ||
raw_data = f.readlines() | ||
safe_data = [] | ||
for i, line in enumerate(raw_data): | ||
if "!function" not in line: | ||
safe_data.append(line) | ||
cache_name = yaml.safe_load("".join(safe_data))["dataset_kwargs"]["cache_dir"] | ||
|
||
|
||
def vinoground_doc_to_visual(doc): | ||
cache_dir = os.path.join(base_cache_dir, cache_name) | ||
|
||
if doc["index"].split("_")[2] == "text": | ||
video_path = os.path.join(cache_dir, "vinoground_videos", "_".join(doc["index"].split("_")[:2]) + ".mp4") | ||
else: | ||
video_path = os.path.join(cache_dir, "vinoground_videos_concated", doc["index"].split("_")[0] + ".mp4") | ||
if not os.path.exists(video_path): | ||
raise Exception(f"video path:{video_path} does not exist, please check") | ||
return [video_path] | ||
|
||
|
||
def vinoground_doc_to_text(doc, lmms_eval_specific_kwargs=None): | ||
if doc["index"].split("_")[2] == "text": | ||
pre_prompt = "Which caption best describes this video?" | ||
option_a = "A. " + doc["pos_cap"] | ||
option_b = "B. " + doc["neg_cap"] | ||
post_prompt = "Answer with the option's letter from the given choices directly. Please only output 1 English character." | ||
full_prompt = pre_prompt + "\n" + option_a + "\n" + option_b + "\n" + post_prompt | ||
else: | ||
pos_neg = doc["index"].split("_")[1] | ||
caption_in_question = doc[f"{pos_neg}_cap"] | ||
pre_prompt = "Which video segment matches this caption? Note: The video contains two segments separated by a 2-second black frame." | ||
caption = f"Caption: {caption_in_question}" | ||
options = "A. The first fragment (before black frame)\nB. The second fragment (after black frame)" | ||
post_prompt = "Answer with the option's letter from the given choices directly. Please only output 1 English character." | ||
full_prompt = pre_prompt + "\n" + caption + "\n" + options + "\n" + post_prompt | ||
return full_prompt | ||
|
||
|
||
def vinoground_process_results(doc, results): | ||
pred = results[0] | ||
|
||
major = doc["major"] | ||
minors = doc["minor"] | ||
categories = [major] | ||
if minors is not None: | ||
categories.extend(minors.split(";")) | ||
question_type = doc["index"].split("_")[2] | ||
data_dict = {"index": doc["index"], "categories": categories, "question_type": question_type, "pred": pred} | ||
|
||
return {"vinoground_score": data_dict} | ||
|
||
|
||
def vinoground_aggregate_results(results): | ||
matrix = np.zeros((500, 7), dtype=np.int8) | ||
|
||
category_all = {} | ||
category_text = {} | ||
category_video = {} | ||
category_group = {} | ||
index_to_categories = {} | ||
|
||
for result in results: | ||
index, categories, question_type, pred = result["index"], result["categories"], result["question_type"], result["pred"] | ||
matrix_col = 0 if "pos" in index else 1 | ||
if question_type == "video": | ||
matrix_col += 3 | ||
gt = "A" if "pos" in index else "B" | ||
idx = int(index.split("_")[0]) | ||
matrix[idx, matrix_col] = pred[0].lower() == gt.lower() | ||
|
||
categories.append("all") | ||
if idx not in index_to_categories.keys(): | ||
index_to_categories[idx] = categories | ||
|
||
matrix[:, 2] = matrix[:, 0] & matrix[:, 1] | ||
matrix[:, 5] = matrix[:, 3] & matrix[:, 4] | ||
matrix[:, 6] = matrix[:, 2] & matrix[:, 5] | ||
|
||
for i in range(500): | ||
for category in index_to_categories[i]: | ||
if category not in category_all.keys(): | ||
category_all[category] = 0 | ||
category_text[category] = 0 | ||
category_video[category] = 0 | ||
category_group[category] = 0 | ||
|
||
category_all[category] += 1 | ||
category_text[category] += matrix[i, 2] | ||
category_video[category] += matrix[i, 5] | ||
category_group[category] += matrix[i, 6] | ||
|
||
loginfo = "Categorical results:\n" | ||
for category in category_all.keys(): | ||
loginfo += ( | ||
f"{category}: text: {category_text[category] / category_all[category] * 100:.2f}%, video: {category_video[category] / category_all[category] * 100:.2f}%, group: {category_group[category] / category_all[category] * 100:.2f}%\n" | ||
) | ||
eval_logger.info(loginfo) | ||
|
||
return matrix[:, 2].mean() * 100, matrix[:, 5].mean() * 100, matrix[:, 6].mean() * 100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
dataset_path: HanSolo9682/Vinoground | ||
dataset_kwargs: | ||
token: True | ||
cache_dir: vinoground | ||
video: True | ||
|
||
task: vinoground | ||
test_split: lmmseval | ||
output_type: generate_until | ||
doc_to_visual: !function utils.vinoground_doc_to_visual | ||
doc_to_text: !function utils.vinoground_doc_to_text | ||
doc_to_target: "answer" | ||
generation_kwargs: | ||
max_new_tokens: 16 | ||
temperature: 0 | ||
top_p: 1.0 | ||
num_beams: 1 | ||
do_sample: false | ||
|
||
process_results: !function utils.vinoground_process_results | ||
|
||
metric_list: | ||
- metric: vinoground_score | ||
aggregation: !function utils.vinoground_aggregate_results | ||
higher_is_better: true | ||
lmms_eval_specific_kwargs: | ||
default: | ||
pre_prompt: "" | ||
post_prompt: "\nAnswer with the option's letter from the given choices directly. Please only output one English character." | ||
metadata: | ||
- version: 0.0 |