NVIDIA · ericharper · Dec 5, 2023 · Dec 5, 2023 · Dec 5, 2023
diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
@@ -67,7 +67,7 @@ def __init__(self, *args, **kwargs):
         self.use_ptuning_only = False
         super().__init__(*args, **kwargs)
         if hasattr(self, "enc_dec_model"):
-            self.model_prefix = "enc_dec_model."  # for T5
+            self.model_prefix = "enc_dec_model.module." if self.cfg.megatron_amp_O2 else "enc_dec_model."  # for T5
         else:
             self.model_prefix = "model.module." if self.cfg.megatron_amp_O2 else "model."
 
@@ -351,7 +351,7 @@ def sharded_state_dict(self, prefix: str = ''):
         if not use_mcore_gpt or (self.use_peft and self.setup_complete):
             return None
         else:
-            return self.model.sharded_state_dict(prefix=self.model_prefix)
+            return super().sharded_state_dict(prefix=prefix)
 
     def load_state_dict(self, state_dict, strict: bool = True):
         if len(state_dict) == 0: