Skip to content

Commit

Permalink
[Bugfix][Model] Skip loading lm_head weights if using tie_word_embedd…
Browse files Browse the repository at this point in the history
…ings (vllm-project#6758)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
Signed-off-by: Alvant <alvasian@yandex.ru>
  • Loading branch information
tjohnson31415 authored and Alvant committed Oct 26, 2024
1 parent 58ec11c commit 477a006
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 1 deletion.
7 changes: 7 additions & 0 deletions vllm/model_executor/models/chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,6 +998,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
# Models trained using ColossalAI may include these tensors in
# the checkpoint. Skip them.
continue

# With tie_word_embeddings, we can skip lm_head.weight
# The weight might appear unnecessarily in the files if the model is
# processed with quantization, LoRA, fine-tuning, etc.
if self.config.tie_word_embeddings and "lm_head.weight" in name:
continue

use_default_weight_loading = False
if "vqmodel" in name:
if self.model.vqmodel is not None:
Expand Down
5 changes: 5 additions & 0 deletions vllm/model_executor/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
# Models trained using ColossalAI may include these tensors in
# the checkpoint. Skip them.
continue
# With tie_word_embeddings, we can skip lm_head.weight
# The weight might appear unnecessarily in the files if the model is
# processed with quantization, LoRA, fine-tuning, etc.
if self.config.tie_word_embeddings and "lm_head.weight" in name:
continue
if scale_name := get_compressed_tensors_cache_scale(name):
# Loading kv cache scales for compressed-tensors quantization
param = params_dict[scale_name]
Expand Down
6 changes: 5 additions & 1 deletion vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
# Models trained using ColossalAI may include these tensors in
# the checkpoint. Skip them.
continue

# With tie_word_embeddings, we can skip lm_head.weight
# The weight might appear unnecessarily in the files if the model is
# processed with quantization, LoRA, fine-tuning, etc.
if self.config.tie_word_embeddings and "lm_head.weight" in name:
continue
for (param_name, weight_name, shard_id) in stacked_params_mapping:
if weight_name not in name:
continue
Expand Down
5 changes: 5 additions & 0 deletions vllm/model_executor/models/olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,11 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
# Models trained using ColossalAI may include these tensors in
# the checkpoint. Skip them.
continue
# With tie_word_embeddings, we can skip lm_head.weight
# The weight might appear unnecessarily in the files if the model is
# processed with quantization, LoRA, fine-tuning, etc.
if self.config.tie_word_embeddings and "lm_head.weight" in name:
continue
for (param_name, weight_name, shard_id) in stacked_params_mapping:
if weight_name not in name:
continue
Expand Down

0 comments on commit 477a006

Please sign in to comment.