From 49ea68ce629c5b56c04f38dfcbd2785836f49cd1 Mon Sep 17 00:00:00 2001 From: Golovneva <103262907+Golovneva@users.noreply.github.com> Date: Fri, 4 Nov 2022 12:01:27 -0400 Subject: [PATCH] updating requirements (#4860) * updating requirements * lint --- projects/roscoe/README.md | 14 ++- .../roscoe/baselines/bart_requirements.txt | 98 ------------------- projects/roscoe/baselines/requirements.txt | 6 +- projects/roscoe/baselines/run.py | 2 +- projects/roscoe/baselines/scores.py | 33 ++++--- 5 files changed, 28 insertions(+), 125 deletions(-) delete mode 100644 projects/roscoe/baselines/bart_requirements.txt diff --git a/projects/roscoe/README.md b/projects/roscoe/README.md index a9883a1a6e9..01fc6b060c8 100644 --- a/projects/roscoe/README.md +++ b/projects/roscoe/README.md @@ -83,18 +83,16 @@ bash projects/roscoe/roscoe_data/download_annotated.sh ### Baseline scoring One-time setup: -It is higly recommended to run baseline scoring from a separate conda environment. -```bash -conda create --name roscoe_baselines python=3.8 -conda activate roscoe_baselines -``` +Follow BLEURT [installation quidelines](https://github.com/google-research/bleurt#installation). +Clone [BartScore repo](https://github.com/neulab/BARTScore) and update path in projects/roscoe/baselines/score.py. Install Bart_score requirements. +Upload fine-tuned [BART model](https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth). +Download PRISM [installation quidelines](https://github.com/thompsonb/prism) and download the model. Do not install requirements. Install requirements to run baselines: ```bash +python -c "import nltk; nltk.download('punkt')" +python -c "import nltk; nltk.download('stopwords')" pip install -r projects/roscoe/baselines/requirements.txt -pip install -r projects/roscoe/baselines/bart_requirements.txt ``` -Follow BLEURT [installation quidelines](https://github.com/google-research/bleurt#installation) -Upload fine-tuned [BART model](https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth) Then you can run baselines on all datasets, scores, and use of reference (when possible) with the following: ```bash diff --git a/projects/roscoe/baselines/bart_requirements.txt b/projects/roscoe/baselines/bart_requirements.txt deleted file mode 100644 index 2605069dfc2..00000000000 --- a/projects/roscoe/baselines/bart_requirements.txt +++ /dev/null @@ -1,98 +0,0 @@ -absl-py==0.12.0 -antlr4-python3-runtime==4.8 -astunparse==1.6.3 -bert-score==0.3.9 -boto3==1.17.78 -botocore==1.20.78 -cached-property==1.5.2 -cachetools==4.2.2 -certifi==2020.12.5 -cffi==1.14.5 -chardet==4.0.0 -click==8.0.1 -cmake==3.20.2 -cycler==0.10.0 -Cython==0.29.23 -dataclasses==0.6 -docopt==0.6.2 -fairseq==0.9.0 -filelock==3.0.12 -flatbuffers==1.12 -fsspec==0.8.7 -future==0.18.2 -gast==0.3.3 -google-auth==1.30.0 -google-auth-oauthlib==0.4.4 -google-pasta==0.2.0 -grpcio==1.32.0 -h5py==2.10.0 -huggingface-hub==0.0.8 -hydra-core==1.0.6 -idna==2.10 -importlib-metadata==4.0.1 -importlib-resources==5.1.4 -jmespath==0.10.0 -joblib==1.0.1 -jsonlines==2.0.0 -keras-nightly==2.5.0.dev2021032900 -Keras-Preprocessing==1.1.2 -kiwisolver==1.3.1 -Markdown==3.3.4 -matplotlib==3.4.2 -mosestokenizer==1.1.0 -nltk==3.6.2 -numpy==1.18.5 -oauthlib==3.1.0 -omegaconf==2.0.6 -openfile==0.0.7 -opt-einsum==3.3.0 -packaging==20.9 -pandas==1.2.4 -Pillow==8.2.0 -portalocker==2.0.0 -protobuf==3.17.0 -pyasn1==0.4.8 -pyasn1-modules==0.2.8 -pycparser==2.20 -pyDeprecate==0.3.0 -pyemd==0.5.1 -pyparsing==2.4.7 -python-dateutil==2.8.1 -pytorch-lightning==1.3.0 -pytorch-nlp==0.5.0 -pytorch-pretrained-bert==0.6.2 -pytz==2021.1 -PyYAML==5.3.1 -regex==2021.4.4 -requests==2.25.1 -requests-oauthlib==1.3.0 -rsa==4.7.2 -s3transfer==0.4.2 -sacrebleu==1.5.1 -sacremoses==0.0.45 -scikit-learn==0.24.0 -scipy==1.9.2 -sentencepiece==0.1.97 -six==1.15.0 -tabulate==0.8.9 -tensorboard>=2.4.0 -tensorboard-data-server==0.6.1 -tensorboard-plugin-wit==1.8.0 -tensorflow>=2.3.0 -tensorflow-estimator>=2.3.0 -termcolor==1.1.0 -tf-slim==1.1.0 -threadpoolctl==2.1.0 -tokenizers==0.10.2 -toolwrapper==2.1.0 -torch>=1.6.0 -torchmetrics==0.3.2 -tqdm==4.60.0 -transformers==4.6.1 -typing-extensions==3.7.4.3 -uctools==1.3.0 -unbabel-comet>=0.1.0 -urllib3==1.26.4 -Werkzeug==2.0.1 -wrapt==1.12.1 -zipp==3.4.1 diff --git a/projects/roscoe/baselines/requirements.txt b/projects/roscoe/baselines/requirements.txt index 99fd3e3d369..380b593def2 100644 --- a/projects/roscoe/baselines/requirements.txt +++ b/projects/roscoe/baselines/requirements.txt @@ -1,8 +1,6 @@ -rouge-score -bert-score -ctc_score +rouge-score>=0.1.2 +bert-score>=0.3.9 sentencepiece>=0.1.86 fairseq==0.9.0 sacrebleu>=1.4.8 -torch>=1.4.0 ctc_score \ No newline at end of file diff --git a/projects/roscoe/baselines/run.py b/projects/roscoe/baselines/run.py index a28f7432a67..8cf3ab0db7f 100644 --- a/projects/roscoe/baselines/run.py +++ b/projects/roscoe/baselines/run.py @@ -235,7 +235,7 @@ def save_scores_map(path, metric_to_score): type=str, choices=[x.value for x in UseRef], nargs="+", - default=[x.value for x in UseRef], + default=UseRef.NO.value, help='do we want to generate reference-based or reference-free scores', ) parser.add_argument( diff --git a/projects/roscoe/baselines/scores.py b/projects/roscoe/baselines/scores.py index 8a2558a45f4..b0fca6a1a2e 100644 --- a/projects/roscoe/baselines/scores.py +++ b/projects/roscoe/baselines/scores.py @@ -26,8 +26,8 @@ from importlib.machinery import SourceFileLoader -BART_SCORE_REPO = "/path_to/BARTScore/" -PRISM_SCORE_REPO = "/path_to/SUM" +BART_SCORE_REPO = "/path_to/BARTScore" +PRISM_SCORE_REPO = "/path_to/prism" BLEURT_SCORE_REPO = "/path_to/bleurt" ######### Base functionality @@ -114,7 +114,9 @@ def get_scores(self, score_me): @register_scorer([BLEURT]) class BleurtBaselineScorer(BaselineScorer): def __init__(self): - self.scorer = bleurt_score.BleurtScorer(BLEURT_SCORE_REPO + "/test_checkpoint") + self.scorer = bleurt_score.BleurtScorer( + BLEURT_SCORE_REPO + "/bleurt/test_checkpoint" + ) def get_scores(self, score_me): scores = self.scorer.score( @@ -143,8 +145,6 @@ def get_scores(self, score_me): ######### BartScore (and its variants) -# Note: You might have to load some of its dependencies yourself. -# HuggingFace is the major one # Second argument here should be path to `bart_score.py` of the BARTScore repo try: bart_score = SourceFileLoader( @@ -190,8 +190,12 @@ def __init__(self): self.scorer = BARTScorer( device=DEFAULT_DEVICE, checkpoint='facebook/bart-large-cnn' ) - # Path here should be to fine tuned BART model from https://github.com/neulab/BARTScore#direct-use - self.scorer.load(BART_SCORE_REPO + "/bart_score_para_finetuned.pth") + try: + self.scorer.load(BART_SCORE_REPO + "/bart_score_para_finetuned.pth") + except FileNotFoundError: + raise FileNotFoundError( + f"Path here should be to fine tuned BART model from https://github.com/neulab/BARTScore#direct-use" + ) self.score_type = BARTSCORE_CNN_PARA_F @@ -218,16 +222,17 @@ def load(self, path=None): device=DEFAULT_DEVICE, checkpoint='facebook/bart-large-cnn' ) # Path here to fine-tuend BART Model - self.scorer.load(BART_SCORE_REPO + "/train/reproduce/trained/bart_6000.pth") + try: + self.scorer.load(BART_SCORE_REPO + "/train/reproduce/trained/bart_6000.pth") + except FileNotFoundError: + raise FileNotFoundError( + f"Path here should be to fine tuned BART model from" + + "https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth" + ) self.score_type = BARTSCORE_FINETUNED_F ######### Prism -# Prism deps (minimal set) -# sentencepiece>=0.1.86 -# fairseq==0.9.0 -# sacrebleu>=1.4.8# -# torch>=1.4.0 prism = SourceFileLoader("prism", PRISM_SCORE_REPO + "/prism.py").load_module() @@ -235,7 +240,7 @@ def load(self, path=None): class PrismBaselineScorer(BaselineScorer): def __init__(self): self.scorer = prism.Prism( - model_dir=PRISM_SCORE_REPO + '/models/m39v1/', + model_dir=PRISM_SCORE_REPO + '/m39v1/', lang='en', )