From 259d8ecedbba833386f9300a2667ef61b20943d8 Mon Sep 17 00:00:00 2001 From: Kelvin Jiang <20145768+kelvin-jiang@users.noreply.github.com> Date: Sun, 30 May 2021 16:33:37 -0400 Subject: [PATCH] Fixed bug in FEVER experiment regarding trec_eval params (#1552) --- docs/experiments-fever.md | 10 +++++----- src/main/python/fever/tune_bm25.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/experiments-fever.md b/docs/experiments-fever.md index 1bd03bb5b2..35e195263d 100644 --- a/docs/experiments-fever.md +++ b/docs/experiments-fever.md @@ -119,10 +119,10 @@ This run produces the following results: |:----|----------------:|----------------:| | 1 | 0.3887 | 0.5925 | | 5 | 0.6517 | 0.7678 | -| 10 | 0.7349 | 0.8233 | -| 25 | 0.8117 | 0.8745 | -| 50 | 0.8570 | 0.9047 | -| 100 | 0.8900 | 0.9267 | +| 10 | 0.7349 | 0.8233 | +| 25 | 0.8117 | 0.8745 | +| 50 | 0.8570 | 0.9047 | +| 100 | 0.8900 | 0.9267 | Note that this outperforms the TF-IDF baseline in the FEVER paper at every value of k. @@ -158,7 +158,7 @@ python src/main/python/fever/tune_bm25.py \ --runs_folder runs/fever-bm25 \ --index_folder indexes/fever/lucene-index-fever-paragraph \ --queries_file collections/fever/queries.paragraph.train-subset.tsv \ - --qrels_file collections/fever/qrels.paragraph.train-subset.tsv + --qrels_file collections/fever/qrels.paragraph.train-subset.txt ``` From the grid search, we observe that the parameters `k1=0.9`, `b=0.1` perform fairly well. If we retrieve on the dev set with these parameters: diff --git a/src/main/python/fever/tune_bm25.py b/src/main/python/fever/tune_bm25.py index 07da6ab90f..d76800f8bf 100644 --- a/src/main/python/fever/tune_bm25.py +++ b/src/main/python/fever/tune_bm25.py @@ -44,10 +44,10 @@ def evaluate_runs(args): run_file = os.path.join(args.runs_folder, file) # evaluate with trec_eval results = subprocess.check_output(['tools/eval/trec_eval.9.0.4/trec_eval', + '-mrecall.100', + '-mmap', args.qrels_file, - run_file, - '-m recall.100', - '-m map']) + run_file]) # regex match trec_eval output to get metrics match = re.search('map +\tall\t([0-9.]+)', results.decode('utf-8')) map = float(match.group(1))