Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX Correctly determine word embeddings on Deberta #2257

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 27 additions & 14 deletions src/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,20 +630,33 @@ def _setup_prompt_encoder(self, adapter_name: str):
if config.num_transformer_submodules is None:
config.num_transformer_submodules = 2 if config.task_type == TaskType.SEQ_2_SEQ_LM else 1

for named_param, value in list(transformer_backbone.named_parameters()):
# for ZeRO-3, the tensor is sharded across accelerators and deepspeed modifies it to a tensor with shape [0]
# the actual unsharded shape is stored in "ds_shape" attribute
# special handling is needed in case the model is initialized in deepspeed.zero.Init() context or HfDeepSpeedConfig
# has been called before
# For reference refer to issue: https://github.com/huggingface/peft/issues/996
deepspeed_distributed_tensor_shape = getattr(value, "ds_shape", None)

if value.shape[0] == self.base_model.config.vocab_size or (
deepspeed_distributed_tensor_shape is not None
and deepspeed_distributed_tensor_shape[0] == self.base_model.config.vocab_size
):
self.word_embeddings = transformer_backbone.get_submodule(named_param.replace(".weight", ""))
break
# determine the word embeddings
word_embeddings = None
try:
# First try to find the word embeddings based on the module name, this should work for models like Bert,
# Roberta, Deberta, etc.
word_embeddings = self.base_model.get_submodule("embeddings.word_embeddings")
except AttributeError:
pass

if word_embeddings is None:
# Word embeddings could not be determined. Next try to guess them by checking which parameter has the size
# of the vocab.
for named_param, value in list(transformer_backbone.named_parameters()):
# for ZeRO-3, the tensor is sharded across accelerators and deepspeed modifies it to a tensor with shape
# [0] the actual unsharded shape is stored in "ds_shape" attribute special handling is needed in case
# the model is initialized in deepspeed.zero.Init() context or HfDeepSpeedConfig has been called before
# For reference refer to issue: https://github.com/huggingface/peft/issues/996
deepspeed_distributed_tensor_shape = getattr(value, "ds_shape", None)

if value.shape[0] == self.base_model.config.vocab_size or (
deepspeed_distributed_tensor_shape is not None
and deepspeed_distributed_tensor_shape[0] == self.base_model.config.vocab_size
):
word_embeddings = transformer_backbone.get_submodule(named_param.replace(".weight", ""))
break

self.word_embeddings = word_embeddings

if config.peft_type == PeftType.PROMPT_TUNING:
prompt_encoder = PromptEmbedding(config, self.word_embeddings)
Expand Down
Loading