update eval

Lichang-Chen · Nov 16, 2023 · 3733d9c · 3733d9c
1 parent d0114ff
commit 3733d9c
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/evaluation.py b/evaluation.py
@@ -128,7 +128,7 @@ def generate_answer(data, model_output_entry):
 
     print("##### Leaderboard Stats #####")
 
-    table = [["", "Acc per question pair", "Acc per figure", "Acc per easy question", "Acc per hard question", "Acc per question"], 
+    table = [["", "Acc per question pair (qAcc)", "Acc per figure (fAcc)", "Acc per easy question (easy aAcc)", "Acc per hard question (hard aAcc)", "Acc per question (aAcc)"], 
               ["GPT Eval", pair_acc_gpt, figure_acc_gpt, easy_acc_gpt, hard_acc_gpt, q_acc_gpt]]
     leaderboard = PrettyTable(table[0])
     leaderboard.add_rows(table[1:])

diff --git a/gpt4v_benchmark.py b/gpt4v_benchmark.py
@@ -232,7 +232,7 @@ def generate_answer(data, model_output_entry):
 
     print("##### Leaderboard Stats #####")
 
-    table = [["", "Acc per question pair", "Acc per figure", "Acc per easy question", "Acc per hard question", "Acc per question"], 
+    table = [["", "Acc per question pair (qAcc)", "Acc per figure (fAcc)", "Acc per easy question (easy aAcc)", "Acc per hard question (hard aAcc)", "Acc per question (aAcc)"], 
               ["Human Eval", pair_acc_human, figure_acc_human, easy_acc_human, hard_acc_human, q_acc_human], 
               ["GPT Eval", pair_acc_gpt, figure_acc_gpt, easy_acc_gpt, hard_acc_gpt, q_acc_gpt]]
     leaderboard = PrettyTable(table[0])