diff --git a/docs/regressions-msmarco-doc-hgf-wp.md b/docs/regressions-msmarco-doc-hgf-wp.md index f868b0c5ff..7da7fa3567 100644 --- a/docs/regressions-msmarco-doc-hgf-wp.md +++ b/docs/regressions-msmarco-doc-hgf-wp.md @@ -44,19 +44,19 @@ After indexing has completed, you should be able to perform retrieval as follows ``` target/appassembler/bin/SearchCollection \ -index indexes/lucene-index.msmarco-doc-hgf-wp/ \ - -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.wp.tsv.gz \ + -topics src/main/resources/topics-and-qrels/topics.msmarco-doc.dev.txt \ -topicreader TsvInt \ - -output runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.wp.txt \ + -output runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt \ -bm25 -analyzeWithHuggingFaceTokenizer bert-base-uncased & ``` Evaluation can be performed using `trec_eval`: ``` -tools/eval/trec_eval.9.0.4/trec_eval -c -m map src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.wp.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.wp.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.wp.txt -tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.wp.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m map src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt +tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-doc.dev.txt runs/run.msmarco-doc.bm25-default.topics.msmarco-doc.dev.txt ``` ## Effectiveness diff --git a/src/main/resources/regression/mrtydi-v1.1-te.yaml b/src/main/resources/regression/mrtydi-v1.1-te.yaml index 5263098f6d..61726051ff 100644 --- a/src/main/resources/regression/mrtydi-v1.1-te.yaml +++ b/src/main/resources/regression/mrtydi-v1.1-te.yaml @@ -10,7 +10,7 @@ index_options: -storePositions -storeDocvectors -storeRaw -language te index_stats: documents: 548224 documents (non-empty): 548224 - total terms: 27173644 + total terms: 26812052 metrics: - metric: MRR@100 diff --git a/src/main/resources/regression/msmarco-doc-hgf-wp.yaml b/src/main/resources/regression/msmarco-doc-hgf-wp.yaml index 643afbfef7..82ca268b41 100644 --- a/src/main/resources/regression/msmarco-doc-hgf-wp.yaml +++ b/src/main/resources/regression/msmarco-doc-hgf-wp.yaml @@ -48,7 +48,7 @@ qrels_root: src/main/resources/topics-and-qrels/ topics: - name: "[MS MARCO Doc: Dev](https://github.com/microsoft/MSMARCO-Document-Ranking)" id: dev - path: topics.msmarco-doc.dev.wp.tsv.gz + path: topics.msmarco-doc.dev.txt qrel: qrels.msmarco-doc.dev.txt models: