Skip to content

Commit

Permalink
Tweak script for summarizing BEIR results (#2363)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Feb 8, 2024
1 parent 88e6964 commit 6457a9e
Showing 1 changed file with 40 additions and 3 deletions.
43 changes: 40 additions & 3 deletions src/main/python/beir/gather_beir_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,28 @@
'scifact'
]

models = ['flat', 'multifield', 'unicoil-noexp', 'splade-distil-cocodenser-medium']
beir_final_keys = {
'trec-covid': 'TREC-COVID',
'bioasq': 'BioASQ',
'nfcorpus': 'NFCorpus',
'nq': 'NQ',
'hotpotqa': 'HotpotQA',
'fiqa': 'FiQA-2018',
'signal1m': 'Signal-1M',
'trec-news': 'TREC-NEWS',
'robust04': 'Robust04',
'arguana': 'ArguAna',
'webis-touche2020': 'Tóuche-2020',
'cqadupstack': 'CQADupStack',
'quora': 'Quora',
'dbpedia-entity': 'DBPedia',
'scidocs': 'SCIDOCS',
'fever': 'FEVER',
'climate-fever': 'Climate-FEVER',
'scifact': 'SciFact'
}

models = ['flat', 'multifield', 'unicoil-noexp', 'splade-pp-ed', 'bge-base-en-v1.5-hnsw']
metrics = ['nDCG@10', 'R@100', 'R@1000']

table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
Expand Down Expand Up @@ -92,11 +113,27 @@
print(f'{key:25}{table[key]["flat"][metric]:.4f} ' +
f'{table[key]["multifield"][metric]:.4f} ' +
f'{table[key]["unicoil-noexp"][metric]:.4f} ' +
f'{table[key]["splade-distil-cocodenser-medium"][metric]:.4f}')
f'{table[key]["splade-pp-ed"][metric]:.4f} ' +
f'{table[key]["bge-base-en-v1.5-hnsw"][metric]:.4f}')

print(' ' * 25 + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6 + ' ' + '-' * 6)
print(' ' * 25 + f'{final_scores["flat"][metric]:0.4f} ' +
f'{final_scores["multifield"][metric]:0.4f} ' +
f'{final_scores["unicoil-noexp"][metric]:0.4f} ' +
f'{final_scores["splade-distil-cocodenser-medium"][metric]:0.4f}')
f'{final_scores["splade-pp-ed"][metric]:0.4f} ' +
f'{final_scores["bge-base-en-v1.5-hnsw"][metric]:.4f}')
print('\n')

for key in beir_final_keys:
if key != 'cqadupstack':
print(f'{beir_final_keys[key]:25} & ' +
f'{table[key]["flat"]["nDCG@10"]:.3f} & {table[key]["splade-pp-ed"]["nDCG@10"]:.3f} & {table[key]["bge-base-en-v1.5-hnsw"]["nDCG@10"]:.3f} & ' +
f'{table[key]["flat"]["R@100"] :.3f} & {table[key]["splade-pp-ed"]["R@100"] :.3f} & {table[key]["bge-base-en-v1.5-hnsw"]["R@100"] :.3f} \\\\')
else:
cqa_score = cqadupstack_sums["flat"]["nDCG@10"] / 12
print(f'{beir_final_keys[key]:25} & ' +
f'{cqadupstack_sums["flat"]["nDCG@10"] / 12:.3f} & {cqadupstack_sums["splade-pp-ed"]["nDCG@10"] / 12:.3f} & {cqadupstack_sums["bge-base-en-v1.5-hnsw"]["nDCG@10"] / 12:.3f} & ' +
f'{cqadupstack_sums["flat"]["R@100"] / 12 :.3f} & {cqadupstack_sums["splade-pp-ed"]["R@100"] / 12 :.3f} & {cqadupstack_sums["bge-base-en-v1.5-hnsw"]["R@100"] / 12 :.3f} \\\\')
print(' ' * 25 + ' & ' +
f'{final_scores["flat"]["nDCG@10"]:0.3f} & {final_scores["splade-pp-ed"]["nDCG@10"]:0.3f} & {final_scores["bge-base-en-v1.5-hnsw"]["nDCG@10"]:.3f} & ' +
f'{final_scores["flat"]["R@100"] :0.3f} & {final_scores["splade-pp-ed"]["R@100"] :0.3f} & {final_scores["bge-base-en-v1.5-hnsw"]["R@100"] :.3f} \\\\')

0 comments on commit 6457a9e

Please sign in to comment.