Skip to content

Commit

Permalink
update irst unittest with doc (#1122)
Browse files Browse the repository at this point in the history
* update irst unitest with dl19/dl20 msmarco passage/doc-full/doc-segment
  • Loading branch information
stephaniewhoo authored Apr 20, 2022
1 parent bae159f commit 53333e0
Showing 1 changed file with 333 additions and 24 deletions.
357 changes: 333 additions & 24 deletions integrations/sparse/test_lucenesearcher_check_irst.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,25 @@ def setUp(self):
ibm_model_tar_name = 'ibm_model_1_bert_tok_20211117.tar.gz'
os.system(f'wget {ibm_model_url} -P irst_test/')
os.system(f'tar -xzvf irst_test/{ibm_model_tar_name} -C irst_test')
# qrel
self.qrels_path = f'{self.pyserini_root}/tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt'

def test_sum_aggregation(self):
os.system('python -m pyserini.search.lucene.irst \
--topics ./tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
--tran-path irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-passage-ltr \
--output irst_test/regression_test_sum.txt \
#wp term stat
wp_term_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-passage.20220411.pickle'
os.system(f'wget {wp_term_url} -P irst_test/')
self.dl19_pass = 'tools/topics-and-qrels/topics.dl19-passage.txt'
self.dl20 = 'tools/topics-and-qrels/topics.dl20.txt'

def test_sum_aggregation_dl19_passage(self):
#dl19 passage
topic = 'dl19-passage'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_pass} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-passage \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-passage.20220411.pickle \
--alpha 0.1 ')

score_cmd = f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval \
-c -M1000 -m map -m ndcg_cut.20 {self.qrels_path} irst_test/regression_test_sum.txt'
score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -l 2 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
Expand All @@ -57,20 +63,270 @@ def test_sum_aggregation(self):

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2294)
self.assertEqual(ndcg_score, 0.2997)

def test_max_aggregation(self):
os.system('python -m pyserini.search.lucene.irst \
--topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
--tran-path irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-passage-ltr \
--output irst_test/regression_test_max.txt \
self.assertEqual(map_score, 0.3281)
self.assertEqual(ndcg_score, 0.5260)

def test_sum_aggregation_dl20_passage(self):
#dl20 passage
topic = 'dl20-passage'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-passage \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-passage.20220411.pickle \
--alpha 0.1 ')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -l 2 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3520)
self.assertEqual(ndcg_score, 0.5578)

def test_max_aggregation_dl19(self):
#dl19 passage
topic = 'dl19-passage'

os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_pass} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-passage \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-passage.20220411.pickle \
--alpha 0.3 \
--max-sim ')
score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -l 2 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3286)
self.assertEqual(ndcg_score, 0.5371)


def test_max_aggregation_dl20_passage(self):
#dl20 passage
topic = 'dl20-passage'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-passage \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-passage.20220411.pickle \
--alpha 0.3 \
--max-sim')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -l 2 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3357)
self.assertEqual(ndcg_score, 0.5469)

def tearDown(self):
rmtree('irst_test/')


class TestMsmarcoDocumentIrst(unittest.TestCase):
def setUp(self):
curdir = os.getcwd()
if curdir.endswith('sparse'):
self.pyserini_root = '../..'
else:
self.pyserini_root = '.'
if(os.path.isdir('irst_test')):
rmtree('irst_test')
os.mkdir('irst_test')
# ibm model
ibm_model_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-models/ibm_model_1_bert_tok_20211117.tar.gz'
ibm_model_tar_name = 'ibm_model_1_bert_tok_20211117.tar.gz'
os.system(f'wget {ibm_model_url} -P irst_test/')
os.system(f'tar -xzvf irst_test/{ibm_model_tar_name} -C irst_test')
#wp term stat
wp_term_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-doc.20220411.pickle'
os.system(f'wget {wp_term_url} -P irst_test/')
self.dl19_doc = 'tools/topics-and-qrels/topics.dl19-doc.txt'
self.dl20 = 'tools/topics-and-qrels/topics.dl20.txt'

def test_sum_aggregation_dl19_doc(self):
#dl19
topic = 'dl19-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_doc} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc.20220411.pickle \
--alpha 0.3')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2524)
self.assertEqual(ndcg_score, 0.5494)

def test_sum_aggregation_dl20_doc(self):
topic = 'dl20-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc.20220411.pickle \
--alpha 0.3 ')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3825)
self.assertEqual(ndcg_score, 0.5559)

def test_max_aggregation_dl19_doc(self):
#dl19
topic = 'dl19-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_doc} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc.20220411.pickle \
--alpha 0.3 \
--max-sim')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2204)
self.assertEqual(ndcg_score, 0.4912)

def test_max_aggregation_dl20_doc(self):
#dl20
topic = 'dl20-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc.20220411.pickle \
--alpha 0.3 \
--max-sim')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3373)
self.assertEqual(ndcg_score, 0.5015)

score_cmd = f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval \
-c -M1000 -m map -m ndcg_cut.20 {self.qrels_path} irst_test/regression_test_max.txt'
def tearDown(self):
rmtree('irst_test/')


class TestMsmarcoDocumentSegIrst(unittest.TestCase):
def setUp(self):
curdir = os.getcwd()
if curdir.endswith('sparse'):
self.pyserini_root = '../..'
else:
self.pyserini_root = '.'
if(os.path.isdir('irst_test')):
rmtree('irst_test')
os.mkdir('irst_test')
# ibm model
ibm_model_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-models/ibm_model_1_bert_tok_20211117.tar.gz'
ibm_model_tar_name = 'ibm_model_1_bert_tok_20211117.tar.gz'
os.system(f'wget {ibm_model_url} -P irst_test/')
os.system(f'tar -xzvf irst_test/{ibm_model_tar_name} -C irst_test')
#wp term stat
wp_term_url = 'https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle'
os.system(f'wget {wp_term_url} -P irst_test/')
self.dl19_doc = 'tools/topics-and-qrels/topics.dl19-doc.txt'
self.dl20 = 'tools/topics-and-qrels/topics.dl20.txt'

def test_sum_aggregation_dl19_doc_seg(self):
#dl19
topic = 'dl19-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_doc} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc-segmented \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle \
--hits 10000 --segments \
--alpha 0.3')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2711)
self.assertEqual(ndcg_score, 0.5596)

def test_sum_aggregation_dl20_doc_seg(self):
#dl20
topic = 'dl20-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc-segmented \
--output irst_test/regression_test_sum.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle \
--hits 10000 --segments \
--alpha 0.3 ')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_sum.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
Expand All @@ -79,12 +335,65 @@ def test_max_aggregation(self):

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2234)
self.assertEqual(ndcg_score, 0.2907)
self.assertEqual(map_score, 0.3759)
self.assertEqual(ndcg_score, 0.5343)

def test_max_aggregation_dl19_doc_seg(self):
#dl19
topic = 'dl19-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl19_doc} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc-segmented \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle \
--alpha 0.3 \
--hits 10000 --segments \
--max-sim')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2425)
self.assertEqual(ndcg_score, 0.5195)

def test_max_aggregation_dl20_doc_seg(self):
#dl20
topic = 'dl20-doc'
os.system(f'python -m pyserini.search.lucene.irst \
--topics {self.dl20} \
--translation-model irst_test/ibm_model_1_bert_tok_20211117/ \
--index msmarco-v1-doc-segmented \
--output irst_test/regression_test_max.{topic}.txt \
--wp-stat irst_test/bert_wp_term_freq.msmarco-doc-segmented.20220411.pickle \
--alpha 0.3 \
--hits 10000 --segments \
--max-sim')

score_cmd = f'python -m pyserini.eval.trec_eval \
-c -m map -m ndcg_cut.10 -M 100 {topic} irst_test/regression_test_max.{topic}.txt'

status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
map_score = parse_score(stdout, "map")
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3496)
self.assertEqual(ndcg_score, 0.5089)

def tearDown(self):
rmtree('irst_test/')



if __name__ == '__main__':
unittest.main()

0 comments on commit 53333e0

Please sign in to comment.