Support O2 training of PEFT and SFT (NVIDIA#7971)

* support O2 Signed-off-by: Chen Cui <chcui@nvidia.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Chen Cui <chcui@nvidia.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
rohitrango · Dec 5, 2023 · 2474dd0 · 2474dd0
1 parent 4e2ed33
commit 2474dd0
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
@@ -67,7 +67,7 @@ def __init__(self, *args, **kwargs):
         self.use_ptuning_only = False
         super().__init__(*args, **kwargs)
         if hasattr(self, "enc_dec_model"):
-            self.model_prefix = "enc_dec_model."  # for T5
+            self.model_prefix = "enc_dec_model.module." if self.cfg.megatron_amp_O2 else "enc_dec_model."  # for T5
         else:
             self.model_prefix = "model.module." if self.cfg.megatron_amp_O2 else "model."
 
@@ -351,7 +351,7 @@ def sharded_state_dict(self, prefix: str = ''):
         if not use_mcore_gpt or (self.use_peft and self.setup_complete):
             return None
         else:
-            return self.model.sharded_state_dict(prefix=self.model_prefix)
+            return super().sharded_state_dict(prefix=prefix)
 
     def load_state_dict(self, state_dict, strict: bool = True):
         if len(state_dict) == 0: