From 2adbf1bedcfbfbeb3a5fbad71fad95feaab2b641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xueguang=20Ma=20=E9=A9=AC=E9=9B=AA=E5=85=89?= Date: Wed, 21 Apr 2021 19:53:12 -0400 Subject: [PATCH] Fix windows error (#492) * windows subprocess error * using python rather than python3, we assume python is linked to python3 --- integrations/run_simplesearcher.py | 2 +- integrations/simplesearcher_checker.py | 2 +- pyserini/eval/msmarco_doc_eval.py | 7 +++++-- pyserini/eval/msmarco_passage_eval.py | 7 +++++-- pyserini/eval/trec_eval.py | 5 ++++- tests/test_tokenize_json.py | 22 +++++++++++----------- 6 files changed, 27 insertions(+), 18 deletions(-) diff --git a/integrations/run_simplesearcher.py b/integrations/run_simplesearcher.py index db0591230..d4b3478a3 100644 --- a/integrations/run_simplesearcher.py +++ b/integrations/run_simplesearcher.py @@ -24,7 +24,7 @@ class RunSimpleSearcher: def __init__(self, index: str, topics: str): self.index_path = index self.topics = topics - self.pyserini_base_cmd = 'python3 -m pyserini.search' + self.pyserini_base_cmd = 'python -m pyserini.search' @staticmethod def _cleanup(files: List[str]): diff --git a/integrations/simplesearcher_checker.py b/integrations/simplesearcher_checker.py index 889bd2a3d..5698c9413 100644 --- a/integrations/simplesearcher_checker.py +++ b/integrations/simplesearcher_checker.py @@ -29,7 +29,7 @@ def __init__(self, anserini_root: str, index: str, topics: str, pyserini_topics: self.anserini_base_cmd = os.path.join(self.anserini_root, 'target/appassembler/bin/SearchCollection -topicreader Trec') - self.pyserini_base_cmd = 'python3 -m pyserini.search' + self.pyserini_base_cmd = 'python -m pyserini.search' self.eval_base_cmd = 'tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' diff --git a/pyserini/eval/msmarco_doc_eval.py b/pyserini/eval/msmarco_doc_eval.py index 83ad0c467..38309c823 100644 --- a/pyserini/eval/msmarco_doc_eval.py +++ b/pyserini/eval/msmarco_doc_eval.py @@ -1,12 +1,13 @@ import os import subprocess import sys +import platform from pyserini.search import get_qrels_file from pyserini.util import download_evaluation_script script_path = download_evaluation_script('msmarco_doc_eval') -cmd_prefix = ['python3', script_path] +cmd_prefix = ['python', script_path] args = sys.argv if len(args) > 1: cmd = cmd_prefix + args[1:] @@ -17,9 +18,11 @@ else: cmd = cmd_prefix print(f'Running command: {cmd}') +shell = platform.system() == "Windows" process = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + shell=shell) stdout, stderr = process.communicate() if stderr: print(stderr.decode("utf-8")) diff --git a/pyserini/eval/msmarco_passage_eval.py b/pyserini/eval/msmarco_passage_eval.py index 90e7e6efe..50bce6938 100644 --- a/pyserini/eval/msmarco_passage_eval.py +++ b/pyserini/eval/msmarco_passage_eval.py @@ -1,12 +1,13 @@ import os import subprocess import sys +import platform from pyserini.search import get_qrels_file from pyserini.util import download_evaluation_script script_path = download_evaluation_script('msmarco_passage_eval') -cmd_prefix = ['python3', script_path] +cmd_prefix = ['python', script_path] args = sys.argv if len(args) > 1: cmd = cmd_prefix + args[1:] @@ -15,9 +16,11 @@ else: cmd = cmd_prefix print(f'Running command: {cmd}') +shell = platform.system() == "Windows" process = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + shell=shell) stdout, stderr = process.communicate() if stderr: print(stderr.decode("utf-8")) diff --git a/pyserini/eval/trec_eval.py b/pyserini/eval/trec_eval.py index c431e2800..5974d191a 100644 --- a/pyserini/eval/trec_eval.py +++ b/pyserini/eval/trec_eval.py @@ -1,6 +1,7 @@ import os import subprocess import sys +import platform from pyserini.search import get_qrels_file from pyserini.util import download_evaluation_script @@ -15,9 +16,11 @@ else: cmd = cmd_prefix print(f'Running command: {cmd}') +shell = platform.system() == "Windows" process = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, + shell=shell) stdout, stderr = process.communicate() if stderr: print(stderr.decode("utf-8")) diff --git a/tests/test_tokenize_json.py b/tests/test_tokenize_json.py index d3d6f046d..96b4e1b23 100644 --- a/tests/test_tokenize_json.py +++ b/tests/test_tokenize_json.py @@ -24,15 +24,15 @@ class TestTokenizeJson(unittest.TestCase): def test_bert_single_file(self): inj = 'test_bert_single_file.json' - outj='out_test_bert_single_file.json' + outj = 'out_test_bert_single_file.json' f = open(inj, 'w') f.write('{"id": "doc1","contents": "I have a new gpu!"}\n{"id": "doc2","contents": "I do have an old gpu!"}') f.close() if(os.getcwd().endswith('tests')): - os.system(f'python3 ../pyserini/tokenize_json_collection.py --input {inj} --output {outj}') + os.system(f'python ../pyserini/tokenize_json_collection.py --input {inj} --output {outj}') else: - os.system(f'python3 pyserini/tokenize_json_collection.py --input {inj} --output {outj}') - with open(outj,'r') as ret: + os.system(f'python pyserini/tokenize_json_collection.py --input {inj} --output {outj}') + with open(outj, 'r') as ret: for i, line in enumerate(ret): contents = json.loads(line)['contents'] if (i == 0): @@ -49,24 +49,24 @@ def test_bert_dir(self): if(os.path.isdir(indir)): rmtree(indir) os.mkdir(indir) - f1 = open(indir+'/doc00.json','w') + f1 = open(indir+'/doc00.json', 'w') f1.write('{"id": "doc1","contents": "I have a new gpu!"}\n{"id": "doc2","contents": "I do have an old gpu!"}') f1.close() - f2 = open(indir+'/doc01.json','w') + f2 = open(indir+'/doc01.json', 'w') f2.write('{"id": "doc1","contents": "A new gpu!"}\n{"id": "doc2","contents": "An old gpu!"}') f2.close() if (os.getcwd().endswith('tests')): - os.system(f'python3 ../pyserini/tokenize_json_collection.py --input {indir} --output {outdir}') + os.system(f'python ../pyserini/tokenize_json_collection.py --input {indir} --output {outdir}') else: - os.system(f'python3 pyserini/tokenize_json_collection.py --input {indir} --output {outdir}') - with open(outdir+'/docs00.json','r') as ret: + os.system(f'python pyserini/tokenize_json_collection.py --input {indir} --output {outdir}') + with open(outdir+'/docs00.json', 'r') as ret: for i, line in enumerate(ret): contents = json.loads(line)['contents'] if (i == 0): self.assertEqual('i have a new gp ##u !', contents) else: self.assertEqual('i do have an old gp ##u !', contents) - with open(outdir+'/docs01.json','r') as ret: + with open(outdir+'/docs01.json', 'r') as ret: for i, line in enumerate(ret): contents = json.loads(line)['contents'] if (i == 0): @@ -78,4 +78,4 @@ def test_bert_dir(self): if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()