Merge remote-tracking branch 'origin/master' into add-text-tr-stats

facebookresearch · Mar 8, 2021 · 3c211a1 · 3c211a1
2 parents 767ad3d + 540cb36
commit 3c211a1
Show file tree

Hide file tree

Showing 6 changed files with 222 additions and 144 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -4,22 +4,46 @@ While we are seeding this project with an initial set of popular tasks and a few
 models and examples, ongoing contributions from the research community are
 desired to increase the pool of tasks, models, and baselines.
 
+
 ## Pull Requests
 We actively welcome your pull requests.
 
-1. Fork the repo and create your branch from `master`. Set up your environment
-   and run `pre-commit install` once.
-2. Link [CircleCI](https://circleci.com/vcs-authorize/) to your github account 
-   if you haven't done so previously (and make sure the CircleCI tests run 
-   successfully).
-3. If you've added code that should be tested, [add tests](http://parl.ai/docs/tutorial_tests.html).
-4. If you've changed APIs, update the documentation.
-5. Autoformat and lint your code (`bash autoformat.sh`)
-6. Ensure the test suite passes. Run `python -m pytest -m unit`.
-7. If you've added a new dataset, you should also run
+1. Fork the repo and then clone the forked repository. (See this [github guide](https://guides.github.com/activities/forking/) on forking for more info).  
+   **If you have already cloned the repo directly and committed changes, follow the steps in the [section below](#moving-changes-youve-committed-to-a-fork)**
+2. Create your branch from `master`. Set up your environment
+   and run `pre-commit install` once. 
+3. Make your changes
+4. If you've added code that should be tested, [add tests](http://parl.ai/docs/tutorial_tests.html).
+5. If you've changed APIs, update the documentation.
+6. Autoformat and lint your code (`bash autoformat.sh`)
+7. Ensure the test suite passes. Run `python -m pytest -m unit`.
+8. If you've added a new dataset, you should also run
    `python -m pytest -m data`. Copy-paste the output into a comment in your PR.
-8. If you haven't already, complete the Contributor License Agreement ("CLA").
-9. Once the PR is accepted and CI is passing, we will merge the PR for you.
+9. If you haven't already, complete the Contributor License Agreement ("CLA").
+10. Link [CircleCI](https://circleci.com/vcs-authorize/) to your github account 
+   if you haven't done so previously (and make sure the CircleCI tests run 
+   successfully on the PR after you push your changes).
+11. Push your changes!
+12. Once the PR is accepted and CI is passing, we will merge the PR for you.
+
+### Moving changes you've committed to a fork
+1. Fork the repo
+2. In your local repo, rename your origin remote to upstream 
+   ```
+   git remote rename origin upstream
+   ```
+3. Point origin to the forked repo (instead of to the original repo)
+   ```
+   git remote add origin git@github...<FORK>
+   ```
+4. Fetch from the new origin
+   ```
+   git fetch origin
+   ```
+5. Make your local branch track the remote branch (of the forked repo) 
+   ```
+   git branch --set-upstream-to origin/master master
+   ```
 
 ## Contributor License Agreement ("CLA")
 In order to accept your pull request, we need you to submit a CLA. You only need

diff --git a/docs/source/generate_task_list.py b/docs/source/generate_task_list.py
@@ -7,18 +7,25 @@
 
 MASTER = "https://github.com/facebookresearch/ParlAI/tree/master"
 
-category_order = ['QA', 'Cloze', 'Goal', 'ChitChat', 'Negotiation', 'Visual', 'decanlp']
-category_task_list = {x: [] for x in category_order}
+categories = set()
+for task_dict in task_list:
+    categories.update(task_dict.get('tags', []))
+categories = sorted(categories)
+category_task_list = {x: [] for x in categories}
 
 fout = open('task_list.inc', 'w')
 
 s = "They consist of:  "
-for t in category_order:
+for t in categories:
     fout.write(f"1. {t} tasks\n")
 fout.write("\n")
 
 for task_dict in task_list:
-    tags = task_dict.get('tags', None)
+    tags = task_dict.get('tags', [])
+    if not tags:
+        if 'Uncategorized' not in category_task_list:
+            category_task_list['Uncategorized'] = []
+        category_task_list['Uncategorized'].append(task_dict)
     for tag in tags:
         if tag in category_task_list:
             category_task_list[tag].append(task_dict)
@@ -44,7 +51,7 @@
         urls.append(("code", code_url))
 
         urls_md = ", ".join(f"[{k}]({v})" for k, v in urls)
-        fout.write(f"### {display_name}\n")
+        fout.write(f"### {display_name.title().replace('_', ' ')}\n")
         fout.write(f"_Usage_:  `--task {task}`\n\n")
         fout.write(f"_Links_:  {urls_md}\n\n")
         if description:

diff --git a/docs/source/tutorial_metrics.md b/docs/source/tutorial_metrics.md
@@ -1,9 +1,13 @@
-# Understanding and adding new metrics
+# Understanding and adding metrics
 
 Author: Stephen Roller
 
 ## Introduction and Standard Metrics
 
+:::{tip} List of metrics
+If you're not sure what a metric means, refer to our [List of metrics](#list-of-metrics).
+:::
+
 ParlAI contains a number of built-in metrics that are automatically computed when
 we train and evaluate models. Some of these metrics are _text generation_ metrics,
 which happen any time we generate a text: this includes F1, BLEU and Accuracy.
@@ -53,6 +57,7 @@ One nice thing about metrics is that they are automatically logged to the
 statements into your code.
 
 
+
 ### Agent-specific metrics
 
 Some agents include their own metrics that are computed for them. For example,
@@ -402,3 +407,40 @@ __Under the hood__: Local metrics work by including a "metrics" field in the
 return message. This is a dictionary which maps field name to a metric value.
 When the teacher receives the response from the model, it utilizes the metrics
 field to update counters on its side.
+
+## List of Metrics
+
+Below is a list of metrics and a brief explanation of each.
+
+:::{note} List of metrics
+If you find a metric not listed here,
+please [file an issue on GitHub](https://github.com/facebookresearch/ParlAI/issues/new?assignees=&labels=Docs,Metrics&template=other.md).
+:::
+
+| Metric                  | Explanation  |
+| ----------------------- | ------------ |
+| `accuracy`              | Exact match text accuracy |
+| `bleu-4`                | BLEU-4 of the generation, under a standardized (model-independent) tokenizer |
+| `clip`                  | Fraction of batches with clipped gradients |
+| `ctpb`                  | Context tokens per batch |
+| `ctps`                  | Context tokens per second |
+| `exps`                  | Examples per second |
+| `exs`                   | Number of examples processed since last print |
+| `f1`                    | Unigram F1 overlap, under a standardized (model-independent) tokenizer |
+| `gnorm`                 | Gradient norm |
+| `gpu_mem`               | Fraction of GPU memory used. May slightly underestimate true value. |
+| `hits@1`, `hits@5`, ... | Fraction of correct choices in K guesses. (Similar to recall@K) |
+| `interdistinct-1`, `interdictinct-2` | Fraction of n-grams unique across _all_ generations |
+| `intradistinct-1`, `intradictinct-2` | Fraction of n-grams unique _within_ each utterance |
+| `jga`                   | Joint Goal Accuracy |
+| `loss`                  | Loss |
+| `lr`                    | The most recent learning rate applied |
+| `ltpb`                  | Label tokens per batch |
+| `ltps`                  | Label tokens per second |
+| `rouge-1`, `rouge-1`, `rouge-L` | ROUGE metrics |
+| `token_acc`             | Token-wise accuracy (generative only) |
+| `token_em`              | Utterance-level token accuracy. Roughly corresponds to perfection under greedy search (generative only) |
+| `total_train_updates`   | Number of SGD steps taken across all batches |
+| `tpb`                   | Total tokens (context + label) per batch |
+| `tps`                   | Total tokens (context + label) per second |
+| `ups`                   | Updates per second (approximate) |
diff --git a/parlai/tasks/dialogue_nli/agents.py b/parlai/tasks/dialogue_nli/agents.py
@@ -13,6 +13,7 @@
 import json
 import os
 
+from parlai.core.message import Message
 from parlai.core.teachers import FixedDialogTeacher
 from .build import build
 from parlai.tasks.multinli.agents import convert_to_dialogData
@@ -134,7 +135,7 @@ def get(self, episode_idx, entry_idx=0):
             binary_classes=self.binary_classes,
         )
         new_entry = {k: entry[k] for k in ENTRY_FIELDS if k in entry}
-        return new_entry
+        return Message(new_entry)
 
 
 class ExtrasTeacher(DialogueNliTeacher):

diff --git a/parlai/tasks/empathetic_dialogues/agents.py b/parlai/tasks/empathetic_dialogues/agents.py
@@ -13,6 +13,7 @@
 import numpy as np
 
 from parlai.utils.io import PathManager
+from parlai.core.message import Message
 from parlai.core.teachers import FixedDialogTeacher
 from .build import build
 
@@ -220,18 +221,21 @@ def get(self, episode_idx, entry_idx=0):
         ep = self.data[episode_idx]
         ep_i = ep[entry_idx]
         episode_done = entry_idx >= (len(ep) - 1)
-        action = {
-            'situation': ep_i[3],
-            'emotion': ep_i[2],
-            'text': ep_i[0],
-            'labels': [ep_i[1]],
-            'prepend_ctx': ep_i[6],
-            'prepend_cand': ep_i[7],
-            'deepmoji_ctx': ep_i[4],
-            'deepmoji_cand': ep_i[5],
-            'episode_done': episode_done,
-            'label_candidates': ep_i[8],
-        }
+        action = Message(
+            {
+                'situation': ep_i[3],
+                'emotion': ep_i[2],
+                'text': ep_i[0],
+                'labels': [ep_i[1]],
+                'prepend_ctx': ep_i[6],
+                'prepend_cand': ep_i[7],
+                'deepmoji_ctx': ep_i[4],
+                'deepmoji_cand': ep_i[5],
+                'episode_done': episode_done,
+                'label_candidates': ep_i[8],
+            }
+        )
+
         return action
 
     def share(self):
@@ -268,7 +272,7 @@ def get(self, episode_idx, entry_idx=0):
         ex = self.data[episode_idx]
         episode_done = True
 
-        return {'labels': [ex[2]], 'text': ex[3], 'episode_done': episode_done}
+        return Message({'labels': [ex[2]], 'text': ex[3], 'episode_done': episode_done})
 
 
 class DefaultTeacher(EmpatheticDialoguesTeacher):