Skip to content

Commit

Permalink
add upd
Browse files Browse the repository at this point in the history
  • Loading branch information
AtsuMiyai committed May 29, 2024
1 parent 24dc435 commit 71401ba
Show file tree
Hide file tree
Showing 25 changed files with 1,119 additions and 0 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,19 @@ We also provide the raw data exported from Weights & Biases for the detailed res
- MMMU (mmmu)
- MMMU Validation (mmmu_val)
- MMMU Test (mmmu_test)
- MMUPDBench (mmupdbench)
- MMUPDBench Base (mmupdbench_base)
- MMAADBench Base (mmaadbench_base)
- MMIASDBench Base (mmiasdbench_base)
- MMIVQDBench Base (mmivqdbench_base)
- MMUPDBench Option (mmupdbench_option)
- MMAADBench Option (mmaadbench_option)
- MMIASDBench Option (mmiasdbench_option)
- MMIVQDBench Option (mmivqdbench_option)
- MMUPDBench Instruction (mmupdbench_instruction)
- MMAADBench Instruction (mmaadbench_instruction)
- MMIASDBench Instruction (mmiasdbench_instruction)
- MMIVQDBench Instruction (mmivqdbench_instruction)
- MMVet (mmvet)
- Multi-DocVQA (multidocvqa)
- Multi-DocVQA Validation (multidocvqa_val)
Expand Down
23 changes: 23 additions & 0 deletions lmms_eval/tasks/mmupdbench/_default_template_mmaadbench_base_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmaad_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmaad_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmaad_option
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
23 changes: 23 additions & 0 deletions lmms_eval/tasks/mmupdbench/_default_template_mmiasdbench_base_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmiasd_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf all the options are incorrect, answer \"F. None of the above\"."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmiasd_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmiasd_option
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
23 changes: 23 additions & 0 deletions lmms_eval/tasks/mmupdbench/_default_template_mmivqdbench_base_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\n"
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmivqd_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nIf the given image is irrelevant to the question, answer \"F. The image and question are irrelevant.\"."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmivqd_base
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dataset_path: MM-UPD/MM-UPD
doc_to_target: "answer"
model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nAnswer with the option's letter from the given choices directly."
doc_to_visual: !function utils.mmupdbench_doc_to_visual
doc_to_text: !function utils.mmupdbench_doc_to_text
doc_to_target: "answer"
process_results: !function utils.mmupdbench_process_results
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
output_type: generate_until
dataset_name: mmivqd_option
generation_kwargs:
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmaadbench_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmaadbench_base"
test_split: test
include: _default_template_mmaadbench_base_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaadbench_base
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmaadbench_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmaadbench_instruction"
test_split: test
include: _default_template_mmaadbench_instruction_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaadbench_instruction
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmaadbench_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmaadbench_option"
test_split: test
include: _default_template_mmaadbench_option_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmaadbench_option
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmiasdbench_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmiasdbench_base"
test_split: test
include: _default_template_mmiasdbench_base_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasdbench_base
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmiasdbench_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmiasdbench_instruction"
test_split: test
include: _default_template_mmiasdbench_instruction_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasdbench_instruction
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmiasdbench_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmiasdbench_option"
test_split: test
include: _default_template_mmiasdbench_option_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmiasdbench_option
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmivqdbench_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmivqdbench_base"
test_split: test
include: _default_template_mmivqdbench_base_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqdbench_base
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmivqdbench_instruction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmivqdbench_instruction"
test_split: test
include: _default_template_mmivqdbench_instruction_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqdbench_instruction
higher_is_better: true
7 changes: 7 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmivqdbench_option.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
task: "mmivqdbench_option"
test_split: test
include: _default_template_mmivqdbench_option_yaml
metric_list:
- metric: gpt_eval_score
aggregation: !function utils.mmivqdbench_option
higher_is_better: true
15 changes: 15 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmupdbench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
group: mmupdbench
task:
- mmaadbench_base
- mmaadbench_option
- mmaadbench_instruction
- mmiasdbench_base
- mmiasdbench_option
- mmiasdbench_instruction
- mmivqdbench_base
- mmivqdbench_option
- mmivqdbench_instruction
metadata:
version: 0.0
sys_prompt: ""
gpt_eval_model_name: "gpt-3.5-turbo-0613"
10 changes: 10 additions & 0 deletions lmms_eval/tasks/mmupdbench/mmupdbench_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
group: mmupdbench_base
task:
- mmaadbench_base
- mmiasdbench_base
- mmivqdbench_base
metadata:
version: 0.0
sys_prompt: ""
gpt_eval_model_name: "gpt-3.5-turbo-0613"

Loading

0 comments on commit 71401ba

Please sign in to comment.