Skip to content

Commit

Permalink
Add SPLADE++ ED w/ ONNX on BEIR (#2354)
Browse files Browse the repository at this point in the history
Everything works except for arguana corpus; still being debugged.
  • Loading branch information
lintool authored Feb 12, 2024
1 parent 57d2627 commit 9a5bb60
Show file tree
Hide file tree
Showing 30 changed files with 1,600 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/main/python/run_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,10 @@ def evaluate_and_verify(yaml_data, dry_run):
expected = round(model['results'][metric['metric']][i], metric['metric_precision'])
actual = round(float(eval_out), metric['metric_precision'])

using_hnsw = True if 'VectorQueryGenerator' in model['params'] or '-encoder' in model['params'] else False

# For HNSW, we only print to third digit
if 'VectorQueryGenerator' in model['params']:
if using_hnsw:
result_str = 'expected: {0:.3f} actual: {1:.3f} - metric: {2:<8} model: {3} topics: {4}'.format(
expected, actual, metric['metric'], model['name'], topic_set['id'])
else:
Expand All @@ -216,8 +218,8 @@ def evaluate_and_verify(yaml_data, dry_run):
# For HNSW, be more tolerant, but as long as the actual score is higher than the expected score,
# let the test pass.
if is_close(expected, actual) or \
('VectorQueryGenerator' in model['params'] and is_close(expected, actual, abs_tol=0.007)) or \
('VectorQueryGenerator' in model['params'] and actual > expected):
(using_hnsw and is_close(expected, actual, abs_tol=0.007)) or \
(using_hnsw and actual > expected):
logger.info(ok_str + result_str)
# For ONNX runs, increase tolerance a bit because we observe some minor differences across OSes.
elif '-encoder' in model['params'] and is_close(expected, actual, abs_tol=0.001):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-arguana-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/arguana

index_path: indexes/lucene-index.beir-v1.0.0-arguana-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 8674
documents (non-empty): 8674
total terms: 71992355

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): ArguAna"
id: test
path: topics.beir-v1.0.0-arguana.test.tsv.gz
qrel: qrels.beir-v1.0.0-arguana.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.5203
R@100:
- 0.9744
R@1000:
- 0.9950
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-bioasq-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/bioasq

index_path: indexes/lucene-index.beir-v1.0.0-bioasq-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 14914603
documents (non-empty): 14914603
total terms: 127381306317

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): BioASQ"
id: test
path: topics.beir-v1.0.0-bioasq.test.tsv.gz
qrel: qrels.beir-v1.0.0-bioasq.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.4980
R@100:
- 0.7385
R@1000:
- 0.8757
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-climate-fever-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/climate-fever

index_path: indexes/lucene-index.beir-v1.0.0-climate-fever-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 5416593
documents (non-empty): 5416593
total terms: 28498465299

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): Climate-FEVER"
id: test
path: topics.beir-v1.0.0-climate-fever.test.tsv.gz
qrel: qrels.beir-v1.0.0-climate-fever.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.2297
R@100:
- 0.5211
R@1000:
- 0.7183
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-cqadupstack-android-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/cqadupstack-android

index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 22998
documents (non-empty): 22998
total terms: 108476959

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): CQADupStack-android"
id: test
path: topics.beir-v1.0.0-cqadupstack-android.test.tsv.gz
qrel: qrels.beir-v1.0.0-cqadupstack-android.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.3904
R@100:
- 0.7404
R@1000:
- 0.9064
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-cqadupstack-english-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/cqadupstack-english

index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 40221
documents (non-empty): 40221
total terms: 158861979

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): CQADupStack-english"
id: test
path: topics.beir-v1.0.0-cqadupstack-english.test.tsv.gz
qrel: qrels.beir-v1.0.0-cqadupstack-english.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.4079
R@100:
- 0.6946
R@1000:
- 0.8454
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
corpus: beir-v1.0.0-cqadupstack-gaming-splade-pp-ed
corpus_path: collections/beir-v1.0.0/splade-pp-ed/cqadupstack-gaming

index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade-pp-ed/
collection_class: JsonVectorCollection
generator_class: DefaultLuceneDocumentGenerator
index_threads: 16
index_options: -impact -pretokenized
index_stats:
documents: 45301
documents (non-empty): 45301
total terms: 197713644

metrics:
- metric: nDCG@10
command: target/appassembler/bin/trec_eval
params: -c -m ndcg_cut.10
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@100
command: target/appassembler/bin/trec_eval
params: -c -m recall.100
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false
- metric: R@1000
command: target/appassembler/bin/trec_eval
params: -c -m recall.1000
separator: "\t"
parse_index: 2
metric_precision: 4
can_combine: false

topic_reader: TsvString
topics:
- name: "BEIR (v1.0.0): CQADupStack-gaming"
id: test
path: topics.beir-v1.0.0-cqadupstack-gaming.test.tsv.gz
qrel: qrels.beir-v1.0.0-cqadupstack-gaming.test.txt

models:
- name: splade-pp-ed
display: SPLADE++ (CoCondenser-EnsembleDistil)
params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladePlusPlusEnsembleDistil
results:
nDCG@10:
- 0.4957
R@100:
- 0.8131
R@1000:
- 0.9221
Loading

0 comments on commit 9a5bb60

Please sign in to comment.