Skip to content

Commit

Permalink
Fix tie_word_embeddings handling for GGUF models (#35085)
Browse files Browse the repository at this point in the history
* fix tie_word_embeddings

Signed-off-by: Isotr0py <2037008807@qq.com>

* fix

Signed-off-by: Isotr0py <2037008807@qq.com>

---------

Signed-off-by: Isotr0py <2037008807@qq.com>
  • Loading branch information
Isotr0py authored Dec 5, 2024
1 parent 3544705 commit 482cb28
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/transformers/modeling_gguf_pytorch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,6 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
# FIXME: Currnetly this implementation is only for flan-t5 architecture.
# It needs to be developed for supporting legacy t5.
elif "t5" in architecture or "t5encoder" in architecture:
parsed_parameters["config"]["tie_word_embeddings"] = False
parsed_parameters["config"]["is_gated_act"] = True
updated_architecture = "t5"
else:
Expand Down Expand Up @@ -326,6 +325,12 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False):
if architecture + model_size not in GGUF_SUPPORTED_ARCHITECTURES:
raise ValueError(f"Architecture {architecture + model_size} not supported")

# Handle tie_word_embeddings, if lm_head.weight is not present in tensors,
# tie_word_embeddings is true otherwise false
parsed_parameters["config"]["tie_word_embeddings"] = all(
"output.weight" != tensor.name for tensor in reader.tensors
)

# List all key-value pairs in a columnized format
for gguf_key, field in reader.fields.items():
gguf_key = gguf_key.replace(architecture, updated_architecture)
Expand Down

0 comments on commit 482cb28

Please sign in to comment.