From 43c22b18bc99af07da1ecb8c9590d934a93e1d23 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Mon, 9 Jan 2023 13:23:02 +0000 Subject: [PATCH 1/2] Add EMA docs, fix docs due to incorrect import, fix doc format for common collection Signed-off-by: SeanNaren --- docs/source/common/callbacks.rst | 56 ++++++++++++++++++++++++ docs/source/common/intro.rst | 39 +++-------------- docs/source/common/losses.rst | 16 +++++++ docs/source/common/metrics.rst | 7 +++ docs/source/common/tokenizers.rst | 8 ++++ nemo/collections/common/callbacks/ema.py | 2 +- requirements/requirements_docs.txt | 1 + 7 files changed, 95 insertions(+), 34 deletions(-) create mode 100644 docs/source/common/callbacks.rst create mode 100644 docs/source/common/losses.rst create mode 100644 docs/source/common/metrics.rst create mode 100644 docs/source/common/tokenizers.rst diff --git a/docs/source/common/callbacks.rst b/docs/source/common/callbacks.rst new file mode 100644 index 000000000000..81533488431d --- /dev/null +++ b/docs/source/common/callbacks.rst @@ -0,0 +1,56 @@ +********* +Callbacks +********* + +Exponential Moving Average (EMA) +================================ + +During training, EMA maintains a moving average of the trained parameters. +EMA parameters can produce significantly better results and faster convergence for a variety of different domains and models. + +EMA is a simple calculation. EMA Weights are pre-initialized with the model weights at the start of training. + +Every training update, the EMA weights are updated based on the new model weights. + +.. math:: + ema_w = ema_w * decay + model_w * (1-decay) + +Enabling EMA is straightforward. We can pass the additional argument to the experiment manager at runtime. + +.. code-block:: bash + + python examples/asr/asr_ctc/speech_to_text_ctc.py \ + model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \ + model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \ + trainer.devices=2 \ + trainer.accelerator='gpu' \ + trainer.max_epochs=50 \ + exp_manager.ema.enable=True # pass this additional argument to enable EMA + +To change the decay rate, pass the additional argument. + +.. code-block:: bash + + python examples/asr/asr_ctc/speech_to_text_ctc.py \ + model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \ + model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \ + trainer.devices=2 \ + trainer.accelerator='gpu' \ + trainer.max_epochs=50 \ + exp_manager.ema.enable=True \ + exp_manager.ema.decay=0.999 + +We also offer other helpful arguments. + +.. code-block:: bash + + python examples/asr/asr_ctc/speech_to_text_ctc.py \ + model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \ + model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \ + trainer.devices=2 \ + trainer.accelerator='gpu' \ + trainer.max_epochs=50 \ + exp_manager.ema.enable=True \ + exp_manager.ema.validate_original_weights=True \ # validate the original weights instead of EMA weights. + exp_manager.ema.every_n_steps=2 \ # apply EMA every N steps instead of every step. + exp_manager.ema.cpu_offload=True # offload EMA weights to CPU. May introduce significant slow-downs. diff --git a/docs/source/common/intro.rst b/docs/source/common/intro.rst index 61d160b5c6a5..dbe8d5d17930 100644 --- a/docs/source/common/intro.rst +++ b/docs/source/common/intro.rst @@ -3,37 +3,10 @@ Common Collection The common collection contains things that could be used across all collections. -Tokenizers ----------- -.. automodule:: nemo.collections.common.tokenizers.AutoTokenizer - :special-members: __init__ -.. automodule:: nemo.collections.common.tokenizers.SentencePieceTokenizer - :special-members: __init__ -.. automodule:: nemo.collections.common.tokenizers.TokenizerSpec - :special-members: __init__ +.. toctree:: + :maxdepth: 8 - -Losses ------- -.. automodule:: nemo.collections.common.losses.AggregatorLoss - :special-members: __init__ - -.. automodule:: nemo.collections.common.losses.CrossEntropyLoss - :special-members: __init__ - -.. automodule:: nemo.collections.common.losses.MSELoss - :special-members: __init__ - -.. automodule:: nemo.collections.common.losses.SmoothedCrossEntropyLoss - :special-members: __init__ -.. automodule:: nemo.collections.common.losses.SpanningLoss - :special-members: __init__ - - -Metrics -------- - -.. autoclass:: nemo.collections.common.metrics.Perplexity - :show-inheritance: - :members: - :undoc-members: + callbacks + losses + metrics + tokenizers diff --git a/docs/source/common/losses.rst b/docs/source/common/losses.rst new file mode 100644 index 000000000000..006746face29 --- /dev/null +++ b/docs/source/common/losses.rst @@ -0,0 +1,16 @@ +Losses +------ +.. autoclass:: nemo.collections.common.losses.AggregatorLoss + :special-members: __init__ + +.. autoclass:: nemo.collections.common.losses.CrossEntropyLoss + :special-members: __init__ + +.. autoclass:: nemo.collections.common.losses.MSELoss + :special-members: __init__ + +.. autoclass:: nemo.collections.common.losses.SmoothedCrossEntropyLoss + :special-members: __init__ + +.. autoclass:: nemo.collections.common.losses.SpanningLoss + :special-members: __init__ diff --git a/docs/source/common/metrics.rst b/docs/source/common/metrics.rst new file mode 100644 index 000000000000..a47bd9f6f09b --- /dev/null +++ b/docs/source/common/metrics.rst @@ -0,0 +1,7 @@ +Metrics +------- + +.. autoclass:: nemo.collections.common.metrics.Perplexity + :show-inheritance: + :members: + :undoc-members: diff --git a/docs/source/common/tokenizers.rst b/docs/source/common/tokenizers.rst new file mode 100644 index 000000000000..5c7336e8d603 --- /dev/null +++ b/docs/source/common/tokenizers.rst @@ -0,0 +1,8 @@ +Tokenizers +---------- +.. autoclass:: nemo.collections.common.tokenizers.AutoTokenizer + :special-members: __init__ +.. autoclass:: nemo.collections.common.tokenizers.SentencePieceTokenizer + :special-members: __init__ +.. autoclass:: nemo.collections.common.tokenizers.TokenizerSpec + :special-members: __init__ diff --git a/nemo/collections/common/callbacks/ema.py b/nemo/collections/common/callbacks/ema.py index 49ebbbae040a..0b0b4951a612 100644 --- a/nemo/collections/common/callbacks/ema.py +++ b/nemo/collections/common/callbacks/ema.py @@ -19,9 +19,9 @@ import pytorch_lightning as pl import torch -from lightning_utilities.core.rank_zero import rank_zero_info from pytorch_lightning import Callback from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_info class EMA(Callback): diff --git a/requirements/requirements_docs.txt b/requirements/requirements_docs.txt index 43cb0bb2c185..e9dc73e0ffaf 100644 --- a/requirements/requirements_docs.txt +++ b/requirements/requirements_docs.txt @@ -1,3 +1,4 @@ +Jinja2<3.1 latexcodec numpy sphinx>=3.0 From 4db6e5e6260672d3d6a3403458a640f4251bad14 Mon Sep 17 00:00:00 2001 From: SeanNaren Date: Tue, 10 Jan 2023 11:34:34 +0000 Subject: [PATCH 2/2] Address feedback Signed-off-by: SeanNaren --- docs/source/common/callbacks.rst | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/docs/source/common/callbacks.rst b/docs/source/common/callbacks.rst index 81533488431d..a627e0dd2ca2 100644 --- a/docs/source/common/callbacks.rst +++ b/docs/source/common/callbacks.rst @@ -32,25 +32,20 @@ To change the decay rate, pass the additional argument. .. code-block:: bash python examples/asr/asr_ctc/speech_to_text_ctc.py \ - model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \ - model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \ - trainer.devices=2 \ - trainer.accelerator='gpu' \ - trainer.max_epochs=50 \ + ... exp_manager.ema.enable=True \ exp_manager.ema.decay=0.999 We also offer other helpful arguments. -.. code-block:: bash - - python examples/asr/asr_ctc/speech_to_text_ctc.py \ - model.train_ds.manifest_filepath=/path/to/my/train/manifest.json \ - model.validation_ds.manifest_filepath=/path/to/my/validation/manifest.json \ - trainer.devices=2 \ - trainer.accelerator='gpu' \ - trainer.max_epochs=50 \ - exp_manager.ema.enable=True \ - exp_manager.ema.validate_original_weights=True \ # validate the original weights instead of EMA weights. - exp_manager.ema.every_n_steps=2 \ # apply EMA every N steps instead of every step. - exp_manager.ema.cpu_offload=True # offload EMA weights to CPU. May introduce significant slow-downs. +.. list-table:: + :header-rows: 1 + + * - Argument + - Description + * - `exp_manager.ema.validate_original_weights=True` + - Validate the original weights instead of EMA weights. + * - `exp_manager.ema.every_n_steps=2` + - Apply EMA every N steps instead of every step. + * - `exp_manager.ema.cpu_offload=True` + - Offload EMA weights to CPU. May introduce significant slow-downs.