Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion vllm/lora/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,11 @@ def __init__(
self.packed_modules_mapping = copy.deepcopy(
self.model.packed_modules_mapping)
# Used to indicate whether the model is a multimodal model
self.supports_mm: bool = supports_multimodal(self.model)
self.supports_mm: bool = (
supports_multimodal(self.model)
# In case the model only supports LoRA for
# text modules (e.g. ChatGLM)
and hasattr(self.model, "get_mm_mapping"))
self.packed_modules: Dict[str, List[str]] = {}
self.modules: Dict[str, "BaseLayerWithLoRA"] = {}
# Dict instead of a Set for compatibility with LRUCache.
Expand Down
29 changes: 13 additions & 16 deletions vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,17 +474,18 @@ def __init__(
unpadded_vocab_size = config.vocab_size
if lora_config:
unpadded_vocab_size += lora_config.lora_extra_vocab_size
if not self.config.tie_word_embeddings:
self.lm_head = ParallelLMHead(
unpadded_vocab_size,
config.hidden_size,
org_num_embeddings=config.vocab_size,
padding_size=DEFAULT_VOCAB_PADDING_SIZE
# We need bigger padding if using lora for kernel
# compatibility
if not lora_config else lora_config.lora_vocab_padding_size,
quant_config=quant_config,
)
self.lm_head = ParallelLMHead(
unpadded_vocab_size,
config.hidden_size,
org_num_embeddings=config.vocab_size,
padding_size=DEFAULT_VOCAB_PADDING_SIZE
# We need bigger padding if using lora for kernel
# compatibility
if not lora_config else lora_config.lora_vocab_padding_size,
quant_config=quant_config,
)
if config.tie_word_embeddings:
self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens)
self.scale_width = self.config.hidden_size / self.config.dim_model_base

self.logits_processor = LogitsProcessor(unpadded_vocab_size,
Expand Down Expand Up @@ -517,11 +518,7 @@ def compute_logits(
sampling_metadata: SamplingMetadata,
) -> Optional[torch.Tensor]:
hidden_states = hidden_states / self.scale_width
if self.config.tie_word_embeddings:
lm_head = self.model.embed_tokens
else:
lm_head = self.lm_head
logits = self.logits_processor(lm_head, hidden_states,
logits = self.logits_processor(self.lm_head, hidden_states,
sampling_metadata)
return logits

Expand Down
22 changes: 22 additions & 0 deletions vllm/model_executor/models/minicpm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,28 @@ def _init_layers(


class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
packed_modules_mapping = {
"gate_up_proj": [
"gate_proj",
"up_proj",
],
}

# LoRA specific attributes
supported_lora_modules = [
"kv_a_proj_with_mqa",
"q_a_proj",
"q_b_proj",
"kv_b_proj",
"o_proj",
"gate_up_proj",
"down_proj",
"embed_tokens",
"lm_head",
]

# `embedding_modules` and `embedding_padding_modules`
# are inherited from MiniCPMForCausalLM

def _init_model(self):
self.model = MiniCPM3Model(config=self.config,
Expand Down