feat(train): add note

wenet-e2e · Nov 29, 2023 · 4d35b5a · 4d35b5a
1 parent 25705d0
commit 4d35b5a
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 0 deletions.
diff --git a/wenet/transformer/decoder.py b/wenet/transformer/decoder.py
@@ -133,6 +133,10 @@ def forward(
                     vocab_size) if use_output_layer is True,
                 torch.tensor(0.0), in order to unify api with bidirectional decoder
                 olens: (batch, )
+        NOTE(xcsong):
+            We pass the `__call__` method of the modules instead of `forward` to the
+            checkpointing API because `__call__` attaches all the hooks of the module.
+            https://discuss.pytorch.org/t/any-different-between-model-input-and-model-forward-input/3690/2
         """
         tgt = ys_in_pad
         maxlen = tgt.size(1)

diff --git a/wenet/transformer/encoder.py b/wenet/transformer/encoder.py
@@ -129,6 +129,10 @@ def forward(
             xs: padded output tensor (B, T' ~= T/subsample_rate, D)
             masks: torch.Tensor batch padding mask after subsample
                 (B, 1, T' ~= T/subsample_rate)
+        NOTE(xcsong):
+            We pass the `__call__` method of the modules instead of `forward` to the
+            checkpointing API because `__call__` attaches all the hooks of the module.
+            https://discuss.pytorch.org/t/any-different-between-model-input-and-model-forward-input/3690/2
         """
         T = xs.size(1)
         masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)