From 49ea68ce629c5b56c04f38dfcbd2785836f49cd1 Mon Sep 17 00:00:00 2001
From: Golovneva <103262907+Golovneva@users.noreply.github.com>
Date: Fri, 4 Nov 2022 12:01:27 -0400
Subject: [PATCH] updating requirements (#4860)

* updating requirements

* lint
---
 projects/roscoe/README.md                     | 14 ++-
 .../roscoe/baselines/bart_requirements.txt    | 98 -------------------
 projects/roscoe/baselines/requirements.txt    |  6 +-
 projects/roscoe/baselines/run.py              |  2 +-
 projects/roscoe/baselines/scores.py           | 33 ++++---
 5 files changed, 28 insertions(+), 125 deletions(-)
 delete mode 100644 projects/roscoe/baselines/bart_requirements.txt

diff --git a/projects/roscoe/README.md b/projects/roscoe/README.md
index a9883a1a6e9..01fc6b060c8 100644
--- a/projects/roscoe/README.md
+++ b/projects/roscoe/README.md
@@ -83,18 +83,16 @@ bash projects/roscoe/roscoe_data/download_annotated.sh
 
 ### Baseline scoring
 One-time setup: 
-It is higly recommended to run baseline scoring from a separate conda environment.
-```bash
-conda create --name roscoe_baselines python=3.8
-conda activate roscoe_baselines
-```
+Follow BLEURT [installation quidelines](https://github.com/google-research/bleurt#installation).
+Clone [BartScore repo](https://github.com/neulab/BARTScore) and update path in projects/roscoe/baselines/score.py. Install Bart_score requirements.
+Upload fine-tuned [BART model](https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth).
+Download PRISM [installation quidelines](https://github.com/thompsonb/prism) and download the model. Do not install requirements.
 Install requirements to run baselines:
 ```bash
+python -c "import nltk; nltk.download('punkt')"
+python -c "import nltk; nltk.download('stopwords')"
 pip install -r projects/roscoe/baselines/requirements.txt
-pip install -r projects/roscoe/baselines/bart_requirements.txt
 ```
-Follow BLEURT [installation quidelines](https://github.com/google-research/bleurt#installation)
-Upload fine-tuned [BART model](https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth)
 
 Then you can run baselines on all datasets, scores, and use of reference (when possible) with the following:
 ```bash
diff --git a/projects/roscoe/baselines/bart_requirements.txt b/projects/roscoe/baselines/bart_requirements.txt
deleted file mode 100644
index 2605069dfc2..00000000000
--- a/projects/roscoe/baselines/bart_requirements.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-absl-py==0.12.0
-antlr4-python3-runtime==4.8
-astunparse==1.6.3
-bert-score==0.3.9
-boto3==1.17.78
-botocore==1.20.78
-cached-property==1.5.2
-cachetools==4.2.2
-certifi==2020.12.5
-cffi==1.14.5
-chardet==4.0.0
-click==8.0.1
-cmake==3.20.2
-cycler==0.10.0
-Cython==0.29.23
-dataclasses==0.6
-docopt==0.6.2
-fairseq==0.9.0
-filelock==3.0.12
-flatbuffers==1.12
-fsspec==0.8.7
-future==0.18.2
-gast==0.3.3
-google-auth==1.30.0
-google-auth-oauthlib==0.4.4
-google-pasta==0.2.0
-grpcio==1.32.0
-h5py==2.10.0
-huggingface-hub==0.0.8
-hydra-core==1.0.6
-idna==2.10
-importlib-metadata==4.0.1
-importlib-resources==5.1.4
-jmespath==0.10.0
-joblib==1.0.1
-jsonlines==2.0.0
-keras-nightly==2.5.0.dev2021032900
-Keras-Preprocessing==1.1.2
-kiwisolver==1.3.1
-Markdown==3.3.4
-matplotlib==3.4.2
-mosestokenizer==1.1.0
-nltk==3.6.2
-numpy==1.18.5
-oauthlib==3.1.0
-omegaconf==2.0.6
-openfile==0.0.7
-opt-einsum==3.3.0
-packaging==20.9
-pandas==1.2.4
-Pillow==8.2.0
-portalocker==2.0.0
-protobuf==3.17.0
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pycparser==2.20
-pyDeprecate==0.3.0
-pyemd==0.5.1
-pyparsing==2.4.7
-python-dateutil==2.8.1
-pytorch-lightning==1.3.0
-pytorch-nlp==0.5.0
-pytorch-pretrained-bert==0.6.2
-pytz==2021.1
-PyYAML==5.3.1
-regex==2021.4.4
-requests==2.25.1
-requests-oauthlib==1.3.0
-rsa==4.7.2
-s3transfer==0.4.2
-sacrebleu==1.5.1
-sacremoses==0.0.45
-scikit-learn==0.24.0
-scipy==1.9.2
-sentencepiece==0.1.97
-six==1.15.0
-tabulate==0.8.9
-tensorboard>=2.4.0
-tensorboard-data-server==0.6.1
-tensorboard-plugin-wit==1.8.0
-tensorflow>=2.3.0
-tensorflow-estimator>=2.3.0
-termcolor==1.1.0
-tf-slim==1.1.0
-threadpoolctl==2.1.0
-tokenizers==0.10.2
-toolwrapper==2.1.0
-torch>=1.6.0
-torchmetrics==0.3.2
-tqdm==4.60.0
-transformers==4.6.1
-typing-extensions==3.7.4.3
-uctools==1.3.0
-unbabel-comet>=0.1.0
-urllib3==1.26.4
-Werkzeug==2.0.1
-wrapt==1.12.1
-zipp==3.4.1
diff --git a/projects/roscoe/baselines/requirements.txt b/projects/roscoe/baselines/requirements.txt
index 99fd3e3d369..380b593def2 100644
--- a/projects/roscoe/baselines/requirements.txt
+++ b/projects/roscoe/baselines/requirements.txt
@@ -1,8 +1,6 @@
-rouge-score
-bert-score
-ctc_score
+rouge-score>=0.1.2
+bert-score>=0.3.9
 sentencepiece>=0.1.86
 fairseq==0.9.0
 sacrebleu>=1.4.8
-torch>=1.4.0
 ctc_score
\ No newline at end of file
diff --git a/projects/roscoe/baselines/run.py b/projects/roscoe/baselines/run.py
index a28f7432a67..8cf3ab0db7f 100644
--- a/projects/roscoe/baselines/run.py
+++ b/projects/roscoe/baselines/run.py
@@ -235,7 +235,7 @@ def save_scores_map(path, metric_to_score):
         type=str,
         choices=[x.value for x in UseRef],
         nargs="+",
-        default=[x.value for x in UseRef],
+        default=UseRef.NO.value,
         help='do we want to generate reference-based or reference-free scores',
     )
     parser.add_argument(
diff --git a/projects/roscoe/baselines/scores.py b/projects/roscoe/baselines/scores.py
index 8a2558a45f4..b0fca6a1a2e 100644
--- a/projects/roscoe/baselines/scores.py
+++ b/projects/roscoe/baselines/scores.py
@@ -26,8 +26,8 @@
 
 from importlib.machinery import SourceFileLoader
 
-BART_SCORE_REPO = "/path_to/BARTScore/"
-PRISM_SCORE_REPO = "/path_to/SUM"
+BART_SCORE_REPO = "/path_to/BARTScore"
+PRISM_SCORE_REPO = "/path_to/prism"
 BLEURT_SCORE_REPO = "/path_to/bleurt"
 
 ######### Base functionality
@@ -114,7 +114,9 @@ def get_scores(self, score_me):
 @register_scorer([BLEURT])
 class BleurtBaselineScorer(BaselineScorer):
     def __init__(self):
-        self.scorer = bleurt_score.BleurtScorer(BLEURT_SCORE_REPO + "/test_checkpoint")
+        self.scorer = bleurt_score.BleurtScorer(
+            BLEURT_SCORE_REPO + "/bleurt/test_checkpoint"
+        )
 
     def get_scores(self, score_me):
         scores = self.scorer.score(
@@ -143,8 +145,6 @@ def get_scores(self, score_me):
 
 
 ######### BartScore (and its variants)
-# Note: You might have to load some of its dependencies yourself.
-# HuggingFace is the major one
 # Second argument here should be path to `bart_score.py` of the BARTScore repo
 try:
     bart_score = SourceFileLoader(
@@ -190,8 +190,12 @@ def __init__(self):
         self.scorer = BARTScorer(
             device=DEFAULT_DEVICE, checkpoint='facebook/bart-large-cnn'
         )
-        # Path here should be to fine tuned BART model from https://github.com/neulab/BARTScore#direct-use
-        self.scorer.load(BART_SCORE_REPO + "/bart_score_para_finetuned.pth")
+        try:
+            self.scorer.load(BART_SCORE_REPO + "/bart_score_para_finetuned.pth")
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"Path here should be to fine tuned BART model from https://github.com/neulab/BARTScore#direct-use"
+            )
         self.score_type = BARTSCORE_CNN_PARA_F
 
 
@@ -218,16 +222,17 @@ def load(self, path=None):
             device=DEFAULT_DEVICE, checkpoint='facebook/bart-large-cnn'
         )
         # Path here to fine-tuend BART Model
-        self.scorer.load(BART_SCORE_REPO + "/train/reproduce/trained/bart_6000.pth")
+        try:
+            self.scorer.load(BART_SCORE_REPO + "/train/reproduce/trained/bart_6000.pth")
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"Path here should be to fine tuned BART model from"
+                + "https://dl.fbaipublicfiles.com/parlai/projects/roscoe/fine_tuned_bartscore.pth"
+            )
         self.score_type = BARTSCORE_FINETUNED_F
 
 
 ######### Prism
-# Prism deps (minimal set)
-# sentencepiece>=0.1.86
-# fairseq==0.9.0
-# sacrebleu>=1.4.8#
-# torch>=1.4.0
 prism = SourceFileLoader("prism", PRISM_SCORE_REPO + "/prism.py").load_module()
 
 
@@ -235,7 +240,7 @@ def load(self, path=None):
 class PrismBaselineScorer(BaselineScorer):
     def __init__(self):
         self.scorer = prism.Prism(
-            model_dir=PRISM_SCORE_REPO + '/models/m39v1/',
+            model_dir=PRISM_SCORE_REPO + '/m39v1/',
             lang='en',
         )