From 43c22b18bc99af07da1ecb8c9590d934a93e1d23 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Mon, 9 Jan 2023 13:23:02 +0000
Subject: [PATCH 1/2] Add EMA docs, fix docs due to incorrect import, fix doc
 format for common collection

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 docs/source/common/callbacks.rst         | 56 ++++++++++++++++++++++++
 docs/source/common/intro.rst             | 39 +++--------------
 docs/source/common/losses.rst            | 16 +++++++
 docs/source/common/metrics.rst           |  7 +++
 docs/source/common/tokenizers.rst        |  8 ++++
 nemo/collections/common/callbacks/ema.py |  2 +-
 requirements/requirements_docs.txt       |  1 +
 7 files changed, 95 insertions(+), 34 deletions(-)
 create mode 100644 docs/source/common/callbacks.rst
 create mode 100644 docs/source/common/losses.rst
 create mode 100644 docs/source/common/metrics.rst
 create mode 100644 docs/source/common/tokenizers.rst

diff --git a/docs/source/common/callbacks.rst b/docs/source/common/callbacks.rst
new file mode 100644
index 000000000000..81533488431d
--- /dev/null
+++ b/docs/source/common/callbacks.rst
@@ -0,0 +1,56 @@
+*********
+Callbacks
+*********
+
+Exponential Moving Average (EMA)
+================================
+
+During training, EMA maintains a moving average of the trained parameters.
+EMA parameters can produce significantly better results and faster convergence for a variety of different domains and models.
+
+EMA is a simple calculation. EMA Weights are pre-initialized with the model weights at the start of training.
+
+Every training update, the EMA weights are updated based on the new model weights.
+
+.. math::
+    ema_w = ema_w * decay + model_w * (1-decay)
+
+Enabling EMA is straightforward. We can pass the additional argument to the experiment manager at runtime.
+
+.. code-block:: bash
+
+    python examples/asr/asr_ctc/speech_to_text_ctc.py \
+        model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \
+        model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \
+        trainer.devices=2 \
+        trainer.accelerator='gpu' \
+        trainer.max_epochs=50 \
+        exp_manager.ema.enable=True # pass this additional argument to enable EMA
+
+To change the decay rate, pass the additional argument.
+
+.. code-block:: bash
+
+    python examples/asr/asr_ctc/speech_to_text_ctc.py \
+        model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \
+        model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \
+        trainer.devices=2 \
+        trainer.accelerator='gpu' \
+        trainer.max_epochs=50 \
+        exp_manager.ema.enable=True \
+        exp_manager.ema.decay=0.999
+
+We also offer other helpful arguments.
+
+.. code-block:: bash
+
+    python examples/asr/asr_ctc/speech_to_text_ctc.py \
+        model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \
+        model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \
+        trainer.devices=2 \
+        trainer.accelerator='gpu' \
+        trainer.max_epochs=50 \
+        exp_manager.ema.enable=True \
+        exp_manager.ema.validate_original_weights=True \ # validate the original weights instead of EMA weights.
+        exp_manager.ema.every_n_steps=2 \ # apply EMA every N steps instead of every step.
+        exp_manager.ema.cpu_offload=True # offload EMA weights to CPU. May introduce significant slow-downs.
diff --git a/docs/source/common/intro.rst b/docs/source/common/intro.rst
index 61d160b5c6a5..dbe8d5d17930 100644
--- a/docs/source/common/intro.rst
+++ b/docs/source/common/intro.rst
@@ -3,37 +3,10 @@ Common Collection
 
 The common collection contains things that could be used across all collections.
 
-Tokenizers
-----------
-.. automodule:: nemo.collections.common.tokenizers.AutoTokenizer
-    :special-members: __init__
-.. automodule:: nemo.collections.common.tokenizers.SentencePieceTokenizer
-    :special-members: __init__
-.. automodule:: nemo.collections.common.tokenizers.TokenizerSpec
-    :special-members: __init__
+.. toctree::
+   :maxdepth: 8
 
-
-Losses
-------
-.. automodule:: nemo.collections.common.losses.AggregatorLoss
-    :special-members: __init__
-
-.. automodule:: nemo.collections.common.losses.CrossEntropyLoss
-    :special-members: __init__
-
-.. automodule:: nemo.collections.common.losses.MSELoss
-    :special-members: __init__
-
-.. automodule:: nemo.collections.common.losses.SmoothedCrossEntropyLoss
-    :special-members: __init__
-.. automodule:: nemo.collections.common.losses.SpanningLoss
-    :special-members: __init__
-
-
-Metrics
--------
-
-.. autoclass:: nemo.collections.common.metrics.Perplexity
-    :show-inheritance:
-    :members:
-    :undoc-members:
+   callbacks
+   losses
+   metrics
+   tokenizers
diff --git a/docs/source/common/losses.rst b/docs/source/common/losses.rst
new file mode 100644
index 000000000000..006746face29
--- /dev/null
+++ b/docs/source/common/losses.rst
@@ -0,0 +1,16 @@
+Losses
+------
+.. autoclass:: nemo.collections.common.losses.AggregatorLoss
+    :special-members: __init__
+
+.. autoclass:: nemo.collections.common.losses.CrossEntropyLoss
+    :special-members: __init__
+
+.. autoclass:: nemo.collections.common.losses.MSELoss
+    :special-members: __init__
+
+.. autoclass:: nemo.collections.common.losses.SmoothedCrossEntropyLoss
+    :special-members: __init__
+
+.. autoclass:: nemo.collections.common.losses.SpanningLoss
+    :special-members: __init__
diff --git a/docs/source/common/metrics.rst b/docs/source/common/metrics.rst
new file mode 100644
index 000000000000..a47bd9f6f09b
--- /dev/null
+++ b/docs/source/common/metrics.rst
@@ -0,0 +1,7 @@
+Metrics
+-------
+
+.. autoclass:: nemo.collections.common.metrics.Perplexity
+    :show-inheritance:
+    :members:
+    :undoc-members:
diff --git a/docs/source/common/tokenizers.rst b/docs/source/common/tokenizers.rst
new file mode 100644
index 000000000000..5c7336e8d603
--- /dev/null
+++ b/docs/source/common/tokenizers.rst
@@ -0,0 +1,8 @@
+Tokenizers
+----------
+.. autoclass:: nemo.collections.common.tokenizers.AutoTokenizer
+    :special-members: __init__
+.. autoclass:: nemo.collections.common.tokenizers.SentencePieceTokenizer
+    :special-members: __init__
+.. autoclass:: nemo.collections.common.tokenizers.TokenizerSpec
+    :special-members: __init__
diff --git a/nemo/collections/common/callbacks/ema.py b/nemo/collections/common/callbacks/ema.py
index 49ebbbae040a..0b0b4951a612 100644
--- a/nemo/collections/common/callbacks/ema.py
+++ b/nemo/collections/common/callbacks/ema.py
@@ -19,9 +19,9 @@
 
 import pytorch_lightning as pl
 import torch
-from lightning_utilities.core.rank_zero import rank_zero_info
 from pytorch_lightning import Callback
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from pytorch_lightning.utilities.rank_zero import rank_zero_info
 
 
 class EMA(Callback):
diff --git a/requirements/requirements_docs.txt b/requirements/requirements_docs.txt
index 43cb0bb2c185..e9dc73e0ffaf 100644
--- a/requirements/requirements_docs.txt
+++ b/requirements/requirements_docs.txt
@@ -1,3 +1,4 @@
+Jinja2<3.1
 latexcodec
 numpy
 sphinx>=3.0

From 4db6e5e6260672d3d6a3403458a640f4251bad14 Mon Sep 17 00:00:00 2001
From: SeanNaren <snarenthiran@nvidia.com>
Date: Tue, 10 Jan 2023 11:34:34 +0000
Subject: [PATCH 2/2] Address feedback

Signed-off-by: SeanNaren <snarenthiran@nvidia.com>
---
 docs/source/common/callbacks.rst | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/docs/source/common/callbacks.rst b/docs/source/common/callbacks.rst
index 81533488431d..a627e0dd2ca2 100644
--- a/docs/source/common/callbacks.rst
+++ b/docs/source/common/callbacks.rst
@@ -32,25 +32,20 @@ To change the decay rate, pass the additional argument.
 .. code-block:: bash
 
     python examples/asr/asr_ctc/speech_to_text_ctc.py \
-        model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \
-        model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \
-        trainer.devices=2 \
-        trainer.accelerator='gpu' \
-        trainer.max_epochs=50 \
+        ...
         exp_manager.ema.enable=True \
         exp_manager.ema.decay=0.999
 
 We also offer other helpful arguments.
 
-.. code-block:: bash
-
-    python examples/asr/asr_ctc/speech_to_text_ctc.py \
-        model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \
-        model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \
-        trainer.devices=2 \
-        trainer.accelerator='gpu' \
-        trainer.max_epochs=50 \
-        exp_manager.ema.enable=True \
-        exp_manager.ema.validate_original_weights=True \ # validate the original weights instead of EMA weights.
-        exp_manager.ema.every_n_steps=2 \ # apply EMA every N steps instead of every step.
-        exp_manager.ema.cpu_offload=True # offload EMA weights to CPU. May introduce significant slow-downs.
+.. list-table::
+   :header-rows: 1
+
+   * - Argument
+     - Description
+   * - `exp_manager.ema.validate_original_weights=True`
+     - Validate the original weights instead of EMA weights.
+   * - `exp_manager.ema.every_n_steps=2`
+     - Apply EMA every N steps instead of every step.
+   * - `exp_manager.ema.cpu_offload=True`
+     - Offload EMA weights to CPU. May introduce significant slow-downs.