huggingface · amyeroberts · Feb 12, 2024 · Feb 9, 2024
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -3748,11 +3748,13 @@ def _fix_key(key):
                 else:
                     _loaded_keys = loaded_keys
                 not_initialized_submodules = set_initialized_submodules(model, _loaded_keys)
-                # if we're about to tie the output embeds to the input embeds we don't need to init them
+                # If we're about to tie the output embeds to the input embeds we don't need to init them
                 if hasattr(model.config, "tie_word_embeddings") and model.config.tie_word_embeddings:
                     output_embeddings = model.get_output_embeddings()
                     if output_embeddings is not None:
-                        output_embeddings._is_hf_initialized = True
+                        # Still need to initialize if there is a bias term since biases are not tied.
+                        if not hasattr(output_embeddings, "bias") or output_embeddings.bias is None:
+                            output_embeddings._is_hf_initialized = True
             else:
                 not_initialized_submodules = dict(model.named_modules())
             # This will only initialize submodules that are not marked as initialized by the line above.