NVIDIA · ericharper · Dec 7, 2023 · Nov 24, 2023
diff --git a/nemo/collections/nlp/parts/utils_funcs.py b/nemo/collections/nlp/parts/utils_funcs.py
@@ -12,7 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__all__ = ['list2str', 'tensor2list', 'plot_confusion_matrix', 'get_classification_report']
+__all__ = [
+    'torch_dtype_from_precision',
+    'list2str',
+    'tensor2list',
+    'plot_confusion_matrix',
+    'get_classification_report',
+]
 
 import os
 import time

diff --git a/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py b/scripts/nlp_language_modeling/convert_hf_llama_to_nemo.py
@@ -41,6 +41,7 @@
     NLPSaveRestoreConnector,
     PipelineMixedPrecisionPlugin,
 )
+from nemo.collections.nlp.parts.utils_funcs import torch_dtype_from_precision
 from nemo.utils import logging
 
 
@@ -170,15 +171,6 @@ def convert(args):
         else:
             plugins.append(PipelineMixedPrecisionPlugin(precision=plugin_precision, device='cuda', scaler=scaler))
 
-    if precision == 32:
-        dtype = torch.float32
-    elif precision in [16, "16", "16-mixed"]:
-        dtype = torch.float16
-    elif precision in ["bf16", "bf16-mixed"]:
-        dtype = torch.bfloat16
-    else:
-        dtype = torch.float32  # fallback
-
     nemo_config.precision = precision
     print(f"nemo_config: {nemo_config}")
 
@@ -315,6 +307,7 @@ def convert(args):
     model._save_restore_connector = NLPSaveRestoreConnector()
 
     # cast to target precision and disable cpu init
+    dtype = torch_dtype_from_precision(precision)
     model = model.to(dtype=dtype)
     model.cfg.use_cpu_initialization = False