Minor FP16 fixes when converting old checkpoints (#3514)

* Be robust to loading old checkpoints with apex scalers * Minor fp16 fixes * Vacuum instead
facebookresearch · Mar 22, 2021 · 35b3156 · 35b3156
1 parent 4a55d3d
commit 35b3156
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 2 deletions.
diff --git a/parlai/agents/bart/convert_fairseq_to_parlai.py b/parlai/agents/bart/convert_fairseq_to_parlai.py
@@ -28,6 +28,7 @@
 from parlai.core.params import ParlaiParser
 from parlai.core.script import ParlaiScript
 from parlai.utils.io import PathManager
+from parlai.scripts.vacuum import Vacuum
 
 
 TRANSFORMER_PARAMETER_MAPPING = {
@@ -133,6 +134,8 @@ def run(self):
         self.agent.model.load_state_dict(converted, True)
         self.agent.opt.pop('converting', None)
         self.agent.save(self.opt['output'])
+        # kill the optimizer
+        Vacuum.main(model_file=self.opt['output'], no_backup=True)
         # 4. enjoy!
         self.print_agent_act()
 

diff --git a/parlai/utils/fp16.py b/parlai/utils/fp16.py
@@ -152,7 +152,11 @@ def load_state_dict(self, state_dict):
         (e.g., learning rate) over that found in the state_dict. This allows us to
         resume training from a checkpoint using a new set of optimizer args.
         """
-        if 'loss_scaler' in state_dict and self.scaler is not None:
+        if (
+            'loss_scaler' in state_dict
+            and self.scaler is not None
+            and isinstance(state_dict['loss_scaler'], float)
+        ):
             self.scaler.loss_scale = state_dict['loss_scaler']
         self.optimizer.load_state_dict(state_dict)
 

diff --git a/parlai/zoo/bart/build.py b/parlai/zoo/bart/build.py
@@ -46,7 +46,9 @@
 }
 
 
-def download(datapath, version='v1.0'):
+def download(datapath, version='v1.1'):
+    # v1.0: initial release
+    # v1.1: change the datatype in conversion for a lighter model file
     dpath = os.path.join(datapath, 'models', 'bart')
 
     if not build_data.built(dpath, version):