Skip to content

Commit

Permalink
Merge pull request #128 from Dannoopsy/gqa-ru
Browse files Browse the repository at this point in the history
add task gqa-ru
  • Loading branch information
Luodian authored Jul 1, 2024
2 parents 11fd7e3 + a0de897 commit e19b43a
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
29 changes: 29 additions & 0 deletions lmms_eval/tasks/gqa_ru/gqa_ru.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
dataset_path: deepvk/GQA-ru
dataset_name: testdev_balanced_instructions
dataset_kwargs:
token: True
task: "gqa-ru"
test_split: testdev
output_type: generate_until
doc_to_visual: !function utils.gqa_doc_to_visual
doc_to_text: !function utils.gqa_doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0

model_specific_prompt_kwargs:
default:
pre_prompt: ""
post_prompt: "\nОтветь одним словом."
23 changes: 23 additions & 0 deletions lmms_eval/tasks/gqa_ru/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from datasets import load_dataset

GQA_RAW_IMAGE_DATASET = None
GQA_ID2IMAGE = None


def gqa_doc_to_visual(doc):
global GQA_RAW_IMAGE_DATASET
global GQA_ID2IMAGE
if GQA_RAW_IMAGE_DATASET is None:
GQA_RAW_IMAGE_DATASET = load_dataset("deepvk/GQA-ru", "testdev_balanced_images", split="testdev", token=True)
GQA_ID2IMAGE = {}
for row in GQA_RAW_IMAGE_DATASET:
GQA_ID2IMAGE[row["id"]] = row["image"].convert("RGB")
image = GQA_ID2IMAGE[doc["imageId"]]
return [image]


def gqa_doc_to_text(doc, model_specific_prompt_kwargs):
question = doc["question"]
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
post_prompt = model_specific_prompt_kwargs["post_prompt"]
return f"{pre_prompt}{question}{post_prompt}"

0 comments on commit e19b43a

Please sign in to comment.