Skip to content

Commit

Permalink
Merge pull request EvolvingLMMs-Lab#95 from AtsuMiyai/new_task/upd
Browse files Browse the repository at this point in the history
add MM-UPD
  • Loading branch information
Luodian authored Jun 4, 2024
2 parents a7451a2 + cb2f2d1 commit 517603e
Show file tree
Hide file tree
Showing 17 changed files with 980 additions and 0 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,19 @@ We also provide the raw data exported from Weights & Biases for the detailed res
- MMMU (mmmu)
- MMMU Validation (mmmu_val)
- MMMU Test (mmmu_test)
- MMUPD (mmupd)
- MMUPD Base (mmupd_base)
- MMAAD Base (mmaad_base)
- MMIASD Base (mmiasd_base)
- MMIVQD Base (mmivqd_base)
- MMUPD Option (mmupd_option)
- MMAAD Option (mmaad_option)
- MMIASD Option (mmiasd_option)
- MMIVQD Option (mmivqd_option)
- MMUPD Instruction (mmupd_instruction)
- MMAAD Instruction (mmaad_instruction)
- MMIASD Instruction (mmiasd_instruction)
- MMIVQD Instruction (mmivqd_instruction)
- MMVet (mmvet)
- Multi-DocVQA (multidocvqa)
- Multi-DocVQA Validation (multidocvqa_val)
Expand Down
18 changes: 18 additions & 0 deletions lmms_eval/tasks/mmupd/_default_template_mmupd_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
doc_to_visual: !function utils.mmupd_doc_to_visual
doc_to_text: !function utils.mmupd_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupd_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmaad_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmaad_base"
test_split: test
dataset_name: mmaad_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaad_base
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmaad_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmaad_instruction"
test_split: test
dataset_name: mmaad_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaad_instruction
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmaad_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmaad_option"
test_split: test
dataset_name: mmaad_option
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaad_option
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmiasd_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmiasd_base"
test_split: test
dataset_name: mmiasd_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasd_base
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmiasd_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmiasd_instruction"
test_split: test
dataset_name: mmiasd_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasd_instruction
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmiasd_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmiasd_option"
test_split: test
dataset_name: mmiasd_option
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasd_option
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmivqd_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmivqd_base"
test_split: test
dataset_name: mmivqd_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqd_base
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmivqd_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmivqd_instruction"
test_split: test
dataset_name: mmivqd_base
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf the given image is irrelevant to the question, answer \"F. The image and question are irrelevant.\"."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqd_instruction
higher_is_better: true
12 changes: 12 additions & 0 deletions lmms_eval/tasks/mmupd/mmivqd_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: "mmivqd_option"
test_split: test
dataset_name: mmivqd_option
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
include: _default_template_mmupd_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqd_option
higher_is_better: true
15 changes: 15 additions & 0 deletions lmms_eval/tasks/mmupd/mmupd.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
group: mmupd
task:
- mmaad_base
- mmaad_option
- mmaad_instruction
- mmiasd_base
- mmiasd_option
- mmiasd_instruction
- mmivqd_base
- mmivqd_option
- mmivqd_instruction
metadata:
version: 0.0
sys_prompt: ""
gpt_eval_model_name: "gpt-3.5-turbo-0613"
10 changes: 10 additions & 0 deletions lmms_eval/tasks/mmupd/mmupd_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: mmupd_base
task:
- mmaad_base
- mmiasd_base
- mmivqd_base
metadata:
version: 0.0
sys_prompt: ""
gpt_eval_model_name: "gpt-3.5-turbo-0613"

Loading

0 comments on commit 517603e

Please sign in to comment.