Skip to content

Commit

Permalink
Add code to enable compilation of submission for WebSRC test split
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterheiden committed May 1, 2024
1 parent 7687495 commit 9bca441
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
15 changes: 12 additions & 3 deletions lmms_eval/tasks/websrc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,28 @@ def websrc_process_results(doc, results):
pred = results[0]
parsed_pred = pred
id = doc["page_id"]
websrc_ans = {"id": id, "domain": doc['domain'], "answer": doc["answer"], "parsed_pred": parsed_pred}
websrc_ans = {"id": id, "domain": doc['domain'], "parsed_pred": parsed_pred}
if "answer" in doc:
websrc_ans["answer"] = doc["answer"]

if 'id' in doc:
websrc_ans['question_id'] = doc['id']

return {
"websrc_squad_f1": websrc_ans,
"submission": {
id: pred,
websrc_ans['question_id']: pred,
},
}


def websrc_test_aggregate_results_for_submission(results, args):
path = generate_submission_file("websrc_test_for_submission.json", args)
with open(path, "w") as f:
json.dump(results, f)
out = {}
for result in results:
out.update(result)
json.dump(out, f, indent=4)
lmms_logger.info(f"Results saved to {path}.")


Expand Down
1 change: 1 addition & 0 deletions lmms_eval/tasks/websrc/websrc.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
group: websrc
task:
- websrc_val
- websrc_test
19 changes: 19 additions & 0 deletions lmms_eval/tasks/websrc/websrc_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
dataset_path: rootsautomation/websrc-test
task: "websrc_test"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.websrc_doc_to_visual
doc_to_text: !function utils.websrc_doc_to_text
doc_to_target: "answer"
# The return value of process_results will be used by metrics
process_results: !function utils.websrc_process_results
# Note that the metric name can be either a registed metric function (such as the case for GQA) or a key name returned by process_results
generation_kwargs:
max_new_tokens: 16
image_aspect_ratio: pad
metric_list:
- metric: submission
aggregation: !function utils.websrc_test_aggregate_results_for_submission
higher_is_better: true
metadata:
- version: 0.0

0 comments on commit 9bca441

Please sign in to comment.